aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/00-INDEX4
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/ABI/testing/debugfs-pktcdvd6
-rw-r--r--Documentation/ABI/testing/ima_policy61
-rw-r--r--Documentation/ABI/testing/sysfs-block59
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci77
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss33
-rw-r--r--Documentation/ABI/testing/sysfs-class-mtd125
-rw-r--r--Documentation/ABI/testing/sysfs-class-regulator57
-rw-r--r--Documentation/ABI/testing/sysfs-devices-cache_disable18
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-acpi8
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext491
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab479
-rw-r--r--Documentation/ABI/testing/sysfs-pps73
-rw-r--r--Documentation/Changes26
-rw-r--r--Documentation/CodingStyle4
-rw-r--r--Documentation/DMA-API.txt118
-rw-r--r--Documentation/DMA-mapping.txt18
-rw-r--r--Documentation/DocBook/.gitignore4
-rw-r--r--Documentation/DocBook/Makefile20
-rw-r--r--Documentation/DocBook/alsa-driver-api.tmpl (renamed from Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl)17
-rw-r--r--Documentation/DocBook/debugobjects.tmpl2
-rw-r--r--Documentation/DocBook/genericirq.tmpl1
-rw-r--r--Documentation/DocBook/kernel-api.tmpl9
-rw-r--r--Documentation/DocBook/kgdb.tmpl2
-rw-r--r--Documentation/DocBook/mac80211.tmpl19
-rw-r--r--Documentation/DocBook/procfs_example.c9
-rw-r--r--Documentation/DocBook/tracepoint.tmpl89
-rw-r--r--Documentation/DocBook/uio-howto.tmpl29
-rw-r--r--Documentation/DocBook/writing-an-alsa-driver.tmpl (renamed from Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl)60
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt814
-rw-r--r--Documentation/PCI/pci-iov-howto.txt99
-rw-r--r--Documentation/PCI/pcieaer-howto.txt25
-rw-r--r--Documentation/RCU/listRCU.txt6
-rw-r--r--Documentation/RCU/rcu.txt2
-rw-r--r--Documentation/RCU/rculist_nulls.txt6
-rw-r--r--Documentation/RCU/trace.txt102
-rw-r--r--Documentation/SM501.txt2
-rw-r--r--Documentation/Smack.txt58
-rw-r--r--Documentation/SubmitChecklist2
-rw-r--r--Documentation/SubmittingPatches82
-rw-r--r--Documentation/accounting/getdelays.c3
-rw-r--r--Documentation/arm/Samsung-S3C24XX/GPIO.txt10
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Suspend.txt8
-rw-r--r--Documentation/arm/memory.txt9
-rw-r--r--Documentation/atomic_ops.txt4
-rw-r--r--Documentation/block/biodoc.txt21
-rw-r--r--Documentation/block/deadline-iosched.txt2
-rw-r--r--Documentation/block/switching-sched.txt6
-rw-r--r--Documentation/blockdev/00-INDEX2
-rw-r--r--Documentation/blockdev/mflash.txt84
-rw-r--r--Documentation/braille-console.txt2
-rw-r--r--Documentation/cdrom/packet-writing.txt2
-rw-r--r--Documentation/cgroups/00-INDEX18
-rw-r--r--Documentation/cgroups/cgroups.txt36
-rw-r--r--Documentation/cgroups/cpuacct.txt18
-rw-r--r--Documentation/cgroups/cpusets.txt12
-rw-r--r--Documentation/cgroups/devices.txt2
-rw-r--r--Documentation/cgroups/memcg_test.txt22
-rw-r--r--Documentation/cgroups/memory.txt71
-rw-r--r--Documentation/cgroups/resource_counter.txt27
-rw-r--r--Documentation/connector/cn_test.c7
-rw-r--r--Documentation/cpu-freq/cpu-drivers.txt2
-rw-r--r--Documentation/cpu-freq/governors.txt28
-rw-r--r--Documentation/cpu-freq/user-guide.txt13
-rw-r--r--Documentation/cputopology.txt6
-rw-r--r--Documentation/dell_rbu.txt4
-rw-r--r--Documentation/development-process/5.Posting31
-rw-r--r--Documentation/devices.txt133
-rw-r--r--Documentation/dontdiff1
-rw-r--r--Documentation/driver-model/device.txt32
-rw-r--r--Documentation/driver-model/devres.txt2
-rw-r--r--Documentation/driver-model/platform.txt59
-rw-r--r--Documentation/dvb/get_dvb_firmware93
-rw-r--r--Documentation/dynamic-debug-howto.txt240
-rw-r--r--Documentation/edac.txt8
-rw-r--r--Documentation/fault-injection/fault-injection.txt70
-rw-r--r--Documentation/fb/00-INDEX2
-rw-r--r--Documentation/fb/cyblafb/bugs13
-rw-r--r--Documentation/fb/cyblafb/credits7
-rw-r--r--Documentation/fb/cyblafb/documentation17
-rw-r--r--Documentation/fb/cyblafb/fb.modes154
-rw-r--r--Documentation/fb/cyblafb/performance79
-rw-r--r--Documentation/fb/cyblafb/todo31
-rw-r--r--Documentation/fb/cyblafb/usage217
-rw-r--r--Documentation/fb/cyblafb/whatsnew29
-rw-r--r--Documentation/fb/cyblafb/whycyblafb85
-rw-r--r--Documentation/fb/sh7760fb.txt2
-rw-r--r--Documentation/fb/uvesafb.txt7
-rw-r--r--Documentation/fb/vesafb.txt2
-rw-r--r--Documentation/feature-removal-schedule.txt186
-rw-r--r--Documentation/filesystems/00-INDEX6
-rw-r--r--Documentation/filesystems/Locking35
-rw-r--r--Documentation/filesystems/autofs4-mount-control.txt2
-rw-r--r--Documentation/filesystems/caching/backend-api.txt658
-rw-r--r--Documentation/filesystems/caching/cachefiles.txt501
-rw-r--r--Documentation/filesystems/caching/fscache.txt333
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt778
-rw-r--r--Documentation/filesystems/caching/object.txt313
-rw-r--r--Documentation/filesystems/caching/operations.txt213
-rw-r--r--Documentation/filesystems/debugfs.txt158
-rw-r--r--Documentation/filesystems/exofs.txt176
-rw-r--r--Documentation/filesystems/ext2.txt2
-rw-r--r--Documentation/filesystems/ext3.txt14
-rw-r--r--Documentation/filesystems/ext4.txt38
-rw-r--r--Documentation/filesystems/fiemap.txt2
-rw-r--r--Documentation/filesystems/gfs2-glocks.txt2
-rw-r--r--Documentation/filesystems/gfs2.txt19
-rw-r--r--Documentation/filesystems/isofs.txt9
-rw-r--r--Documentation/filesystems/knfsd-stats.txt159
-rw-r--r--Documentation/filesystems/nfs-rdma.txt2
-rw-r--r--Documentation/filesystems/nfs41-server.txt161
-rw-r--r--Documentation/filesystems/nilfs2.txt199
-rw-r--r--Documentation/filesystems/pohmelfs/design_notes.txt71
-rw-r--r--Documentation/filesystems/pohmelfs/info.txt99
-rw-r--r--Documentation/filesystems/pohmelfs/network_protocol.txt227
-rw-r--r--Documentation/filesystems/proc.txt1403
-rw-r--r--Documentation/filesystems/sysfs-pci.txt12
-rw-r--r--Documentation/filesystems/tmpfs.txt2
-rw-r--r--Documentation/filesystems/udf.txt2
-rw-r--r--Documentation/filesystems/vfat.txt13
-rw-r--r--Documentation/filesystems/vfs.txt3
-rw-r--r--Documentation/firmware_class/README3
-rw-r--r--Documentation/ftrace.txt1424
-rw-r--r--Documentation/futex-requeue-pi.txt131
-rw-r--r--Documentation/gcov.txt246
-rw-r--r--Documentation/gpio.txt25
-rw-r--r--Documentation/hwmon/ds162151
-rw-r--r--Documentation/hwmon/f71882fg12
-rw-r--r--Documentation/hwmon/g760a36
-rw-r--r--Documentation/hwmon/ibmaem2
-rw-r--r--Documentation/hwmon/lis3lv02d20
-rw-r--r--Documentation/hwmon/ltc421550
-rw-r--r--Documentation/hwmon/pcf8591 (renamed from Documentation/i2c/chips/pcf8591)0
-rw-r--r--Documentation/hwmon/sysfs-interface47
-rw-r--r--Documentation/hwmon/tmp40142
-rw-r--r--Documentation/hwmon/w83627ehf36
-rw-r--r--Documentation/i2c/busses/i2c-nforce212
-rw-r--r--Documentation/i2c/busses/i2c-ocores17
-rw-r--r--Documentation/i2c/busses/i2c-piix42
-rw-r--r--Documentation/i2c/busses/i2c-viapro4
-rw-r--r--Documentation/i2c/instantiating-devices211
-rw-r--r--Documentation/i2c/writing-clients35
-rw-r--r--Documentation/ia64/kvm.txt2
-rw-r--r--Documentation/ide/ide.txt2
-rw-r--r--Documentation/infiniband/ipoib.txt45
-rw-r--r--Documentation/input/bcm5974.txt65
-rw-r--r--Documentation/input/input.txt2
-rw-r--r--Documentation/input/multi-touch-protocol.txt195
-rw-r--r--Documentation/input/rotary-encoder.txt108
-rw-r--r--Documentation/ioctl/ioctl-number.txt4
-rw-r--r--Documentation/isdn/00-INDEX51
-rw-r--r--Documentation/isdn/INTERFACE.CAPI299
-rw-r--r--Documentation/isdn/README.gigaset76
-rw-r--r--Documentation/ja_JP/SubmitChecklist2
-rw-r--r--Documentation/kbuild/kconfig.txt116
-rw-r--r--Documentation/kbuild/makefiles.txt93
-rw-r--r--Documentation/kbuild/modules.txt2
-rw-r--r--Documentation/kdump/kdump.txt4
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt7
-rw-r--r--Documentation/kernel-parameters.txt740
-rw-r--r--Documentation/kmemcheck.txt773
-rw-r--r--Documentation/kmemleak.txt142
-rw-r--r--Documentation/kobject.txt2
-rw-r--r--Documentation/kprobes.txt42
-rw-r--r--Documentation/laptops/acer-wmi.txt12
-rw-r--r--Documentation/laptops/sony-laptop.txt2
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt150
-rw-r--r--Documentation/lguest/.gitignore1
-rw-r--r--Documentation/lguest/Makefile3
-rw-r--r--Documentation/lguest/lguest.c1013
-rw-r--r--Documentation/lguest/lguest.txt10
-rw-r--r--Documentation/local_ops.txt2
-rw-r--r--Documentation/lockdep-design.txt36
-rw-r--r--Documentation/logo.gifbin0 -> 16335 bytes
-rw-r--r--Documentation/logo.svg2911
-rw-r--r--Documentation/logo.txt15
-rw-r--r--Documentation/md.txt37
-rw-r--r--Documentation/memory-barriers.txt129
-rw-r--r--Documentation/memory-hotplug.txt8
-rw-r--r--Documentation/misc-devices/isl2900362
-rw-r--r--Documentation/mn10300/ABI.txt2
-rw-r--r--Documentation/mtd/nand_ecc.txt12
-rw-r--r--Documentation/networking/bonding.txt8
-rw-r--r--Documentation/networking/can.txt237
-rw-r--r--Documentation/networking/dccp.txt3
-rw-r--r--Documentation/networking/dm9000.txt2
-rw-r--r--Documentation/networking/ieee802154.txt76
-rw-r--r--Documentation/networking/ip-sysctl.txt181
-rw-r--r--Documentation/networking/ipv6.txt37
-rw-r--r--Documentation/networking/ixgbe.txt199
-rw-r--r--Documentation/networking/l2tp.txt2
-rw-r--r--Documentation/networking/mac80211-injection.txt28
-rw-r--r--Documentation/networking/netdevices.txt2
-rw-r--r--Documentation/networking/operstates.txt3
-rw-r--r--Documentation/networking/packet_mmap.txt140
-rw-r--r--Documentation/networking/phonet.txt2
-rw-r--r--Documentation/networking/rds.txt356
-rw-r--r--Documentation/networking/regulatory.txt2
-rw-r--r--Documentation/networking/timestamping.txt180
-rw-r--r--Documentation/networking/timestamping/.gitignore1
-rw-r--r--Documentation/networking/timestamping/Makefile6
-rw-r--r--Documentation/networking/timestamping/timestamping.c533
-rw-r--r--Documentation/networking/vxge.txt100
-rw-r--r--Documentation/power/devices.txt34
-rw-r--r--Documentation/power/regulator/consumer.txt2
-rw-r--r--Documentation/power/regulator/overview.txt2
-rw-r--r--Documentation/power/s2ram.txt2
-rw-r--r--Documentation/power/userland-swsusp.txt2
-rw-r--r--Documentation/powerpc/booting-without-of.txt93
-rw-r--r--Documentation/powerpc/dts-bindings/can/sja1000.txt53
-rw-r--r--Documentation/powerpc/dts-bindings/ecm.txt64
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/board.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt3
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/dma.txt34
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/esdhc.txt25
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/i2c.txt46
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/mcm.txt64
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/msi-pic.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/pmc.txt4
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/ssi.txt68
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/tsec.txt6
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/upm-nand.txt39
-rw-r--r--Documentation/powerpc/dts-bindings/gpio/led.txt46
-rw-r--r--Documentation/powerpc/dts-bindings/mmc-spi-slot.txt23
-rw-r--r--Documentation/powerpc/dts-bindings/mtd-physmap.txt80
-rw-r--r--Documentation/powerpc/qe_firmware.txt2
-rw-r--r--Documentation/pps/pps.txt172
-rw-r--r--Documentation/rbtree.txt10
-rw-r--r--Documentation/rfkill.txt640
-rw-r--r--Documentation/robust-futex-ABI.txt4
-rw-r--r--Documentation/s390/Debugging390.txt4
-rw-r--r--Documentation/scheduler/00-INDEX2
-rw-r--r--Documentation/scheduler/sched-coding.txt126
-rw-r--r--Documentation/scheduler/sched-nice-design.txt2
-rw-r--r--Documentation/scheduler/sched-rt-group.txt22
-rw-r--r--Documentation/scsi/aacraid.txt15
-rw-r--r--Documentation/scsi/aic79xx.txt2
-rw-r--r--Documentation/scsi/ncr53c8xx.txt4
-rw-r--r--Documentation/scsi/osd.txt198
-rw-r--r--Documentation/scsi/scsi_fc_transport.txt14
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt5
-rw-r--r--Documentation/scsi/sym53c8xx_2.txt2
-rw-r--r--Documentation/slow-work.txt174
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt123
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt41
-rw-r--r--Documentation/sound/alsa/HD-Audio.txt53
-rw-r--r--Documentation/sound/alsa/Procfile.txt39
-rw-r--r--Documentation/sound/alsa/README.maya44163
-rw-r--r--Documentation/sound/alsa/hda_codec.txt2
-rw-r--r--Documentation/sound/alsa/soc/dapm.txt4
-rw-r--r--Documentation/sound/alsa/soc/jack.txt71
-rw-r--r--Documentation/sound/oss/CS423223
-rw-r--r--Documentation/sound/oss/Introduction2
-rw-r--r--Documentation/sparse.txt8
-rw-r--r--Documentation/spi/spi-summary6
-rw-r--r--Documentation/sysctl/00-INDEX2
-rw-r--r--Documentation/sysctl/fs.txt74
-rw-r--r--Documentation/sysctl/kernel.txt64
-rw-r--r--Documentation/sysctl/net.txt175
-rw-r--r--Documentation/sysctl/vm.txt31
-rw-r--r--Documentation/sysfs-rules.txt2
-rw-r--r--Documentation/sysrq.txt7
-rw-r--r--Documentation/timers/hpet.txt2
-rw-r--r--Documentation/timers/timer_stats.txt2
-rw-r--r--Documentation/tomoyo.txt55
-rw-r--r--Documentation/trace/events.txt90
-rw-r--r--Documentation/trace/ftrace.txt1888
-rw-r--r--Documentation/trace/kmemtrace.txt126
-rw-r--r--Documentation/trace/mmiotrace.txt (renamed from Documentation/tracers/mmiotrace.txt)26
-rw-r--r--Documentation/trace/power.txt17
-rw-r--r--Documentation/trace/tracepoints.txt (renamed from Documentation/tracepoints.txt)21
-rw-r--r--Documentation/usb/WUSB-Design-overview.txt8
-rw-r--r--Documentation/usb/anchors.txt4
-rw-r--r--Documentation/usb/callbacks.txt2
-rw-r--r--Documentation/usb/usbmon.txt27
-rw-r--r--Documentation/video4linux/CARDLIST.bttv6
-rw-r--r--Documentation/video4linux/CARDLIST.cx238859
-rw-r--r--Documentation/video4linux/CARDLIST.cx883
-rw-r--r--Documentation/video4linux/CARDLIST.em28xx15
-rw-r--r--Documentation/video4linux/CARDLIST.saa713422
-rw-r--r--Documentation/video4linux/CARDLIST.tuner2
-rw-r--r--Documentation/video4linux/Zoran3
-rw-r--r--Documentation/video4linux/bttv/Insmod-options10
-rw-r--r--Documentation/video4linux/bttv/README4
-rw-r--r--Documentation/video4linux/cx18.txt2
-rw-r--r--Documentation/video4linux/cx2341x/README.hm124
-rw-r--r--Documentation/video4linux/gspca.txt16
-rw-r--r--Documentation/video4linux/pxa_camera.txt174
-rw-r--r--Documentation/video4linux/si470x.txt11
-rw-r--r--Documentation/video4linux/v4l2-framework.txt207
-rw-r--r--Documentation/video4linux/v4lgrab.c4
-rw-r--r--Documentation/video4linux/zr364xx.txt1
-rw-r--r--Documentation/vm/00-INDEX2
-rw-r--r--Documentation/vm/Makefile2
-rw-r--r--Documentation/vm/active_mm.txt83
-rw-r--r--Documentation/vm/balance18
-rw-r--r--Documentation/vm/numa_memory_policy.txt3
-rw-r--r--Documentation/vm/page-types.c698
-rw-r--r--Documentation/vm/page_migration3
-rw-r--r--Documentation/vm/pagemap.txt68
-rw-r--r--Documentation/vm/unevictable-lru.txt1041
-rw-r--r--Documentation/watchdog/hpwdt.txt84
-rw-r--r--Documentation/x86/boot.txt138
-rw-r--r--Documentation/x86/earlyprintk.txt101
-rw-r--r--Documentation/x86/x86_64/boot-options.txt49
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets5
-rw-r--r--Documentation/x86/x86_64/machinecheck8
-rw-r--r--Documentation/x86/x86_64/mm.txt9
312 files changed, 19996 insertions, 10064 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 2a39aeba1464..d05737aaa84b 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -86,6 +86,8 @@ cachetlb.txt
86 - describes the cache/TLB flushing interfaces Linux uses. 86 - describes the cache/TLB flushing interfaces Linux uses.
87cdrom/ 87cdrom/
88 - directory with information on the CD-ROM drivers that Linux has. 88 - directory with information on the CD-ROM drivers that Linux has.
89cgroups/
90 - cgroups features, including cpusets and memory controller.
89connector/ 91connector/
90 - docs on the netlink based userspace<->kernel space communication mod. 92 - docs on the netlink based userspace<->kernel space communication mod.
91console/ 93console/
@@ -98,8 +100,6 @@ cpu-load.txt
98 - document describing how CPU load statistics are collected. 100 - document describing how CPU load statistics are collected.
99cpuidle/ 101cpuidle/
100 - info on CPU_IDLE, CPU idle state management subsystem. 102 - info on CPU_IDLE, CPU idle state management subsystem.
101cpusets.txt
102 - documents the cpusets feature; assign CPUs and Mem to a set of tasks.
103cputopology.txt 103cputopology.txt
104 - documentation on how CPU topology info is exported via sysfs. 104 - documentation on how CPU topology info is exported via sysfs.
105cris/ 105cris/
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 000000000000..5e6a92a02d85
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
index bf9c16b64c34..cf11736acb76 100644
--- a/Documentation/ABI/testing/debugfs-pktcdvd
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -1,4 +1,4 @@
1What: /debug/pktcdvd/pktcdvd[0-7] 1What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
2Date: Oct. 2006 2Date: Oct. 2006
3KernelVersion: 2.6.20 3KernelVersion: 2.6.20
4Contact: Thomas Maier <balagi@justmail.de> 4Contact: Thomas Maier <balagi@justmail.de>
@@ -10,10 +10,10 @@ debugfs interface
10The pktcdvd module (packet writing driver) creates 10The pktcdvd module (packet writing driver) creates
11these files in debugfs: 11these files in debugfs:
12 12
13/debug/pktcdvd/pktcdvd[0-7]/ 13/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
14 info (0444) Lots of driver statistics and infos. 14 info (0444) Lots of driver statistics and infos.
15 15
16Example: 16Example:
17------- 17-------
18 18
19cat /debug/pktcdvd/pktcdvd0/info 19cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
new file mode 100644
index 000000000000..6434f0df012e
--- /dev/null
+++ b/Documentation/ABI/testing/ima_policy
@@ -0,0 +1,61 @@
1What: security/ima/policy
2Date: May 2008
3Contact: Mimi Zohar <zohar@us.ibm.com>
4Description:
5 The Trusted Computing Group(TCG) runtime Integrity
6 Measurement Architecture(IMA) maintains a list of hash
7 values of executables and other sensitive system files
8 loaded into the run-time of this system. At runtime,
9 the policy can be constrained based on LSM specific data.
10 Policies are loaded into the securityfs file ima/policy
11 by opening the file, writing the rules one at a time and
12 then closing the file. The new policy takes effect after
13 the file ima/policy is closed.
14
15 rule format: action [condition ...]
16
17 action: measure | dont_measure
18 condition:= base | lsm
19 base: [[func=] [mask=] [fsmagic=] [uid=]]
20 lsm: [[subj_user=] [subj_role=] [subj_type=]
21 [obj_user=] [obj_role=] [obj_type=]]
22
23 base: func:= [BPRM_CHECK][FILE_MMAP][INODE_PERMISSION]
24 mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
25 fsmagic:= hex value
26 uid:= decimal value
27 lsm: are LSM specific
28
29 default policy:
30 # PROC_SUPER_MAGIC
31 dont_measure fsmagic=0x9fa0
32 # SYSFS_MAGIC
33 dont_measure fsmagic=0x62656572
34 # DEBUGFS_MAGIC
35 dont_measure fsmagic=0x64626720
36 # TMPFS_MAGIC
37 dont_measure fsmagic=0x01021994
38 # SECURITYFS_MAGIC
39 dont_measure fsmagic=0x73636673
40
41 measure func=BPRM_CHECK
42 measure func=FILE_MMAP mask=MAY_EXEC
43 measure func=INODE_PERM mask=MAY_READ uid=0
44
45 The default policy measures all executables in bprm_check,
46 all files mmapped executable in file_mmap, and all files
47 open for read by root in inode_permission.
48
49 Examples of LSM specific definitions:
50
51 SELinux:
52 # SELINUX_MAGIC
53 dont_measure fsmagic=0xF97CFF8C
54
55 dont_measure obj_type=var_log_t
56 dont_measure obj_type=auditd_log_t
57 measure subj_user=system_u func=INODE_PERM mask=MAY_READ
58 measure subj_role=system_r func=INODE_PERM mask=MAY_READ
59
60 Smack:
61 measure subj_user=_ func=INODE_PERM mask=MAY_READ
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 44f52a4f5903..cbbd3e069945 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -60,3 +60,62 @@ Description:
60 Indicates whether the block layer should automatically 60 Indicates whether the block layer should automatically
61 generate checksums for write requests bound for 61 generate checksums for write requests bound for
62 devices that support receiving integrity metadata. 62 devices that support receiving integrity metadata.
63
64What: /sys/block/<disk>/alignment_offset
65Date: April 2009
66Contact: Martin K. Petersen <martin.petersen@oracle.com>
67Description:
68 Storage devices may report a physical block size that is
69 bigger than the logical block size (for instance a drive
70 with 4KB physical sectors exposing 512-byte logical
71 blocks to the operating system). This parameter
72 indicates how many bytes the beginning of the device is
73 offset from the disk's natural alignment.
74
75What: /sys/block/<disk>/<partition>/alignment_offset
76Date: April 2009
77Contact: Martin K. Petersen <martin.petersen@oracle.com>
78Description:
79 Storage devices may report a physical block size that is
80 bigger than the logical block size (for instance a drive
81 with 4KB physical sectors exposing 512-byte logical
82 blocks to the operating system). This parameter
83 indicates how many bytes the beginning of the partition
84 is offset from the disk's natural alignment.
85
86What: /sys/block/<disk>/queue/logical_block_size
87Date: May 2009
88Contact: Martin K. Petersen <martin.petersen@oracle.com>
89Description:
90 This is the smallest unit the storage device can
91 address. It is typically 512 bytes.
92
93What: /sys/block/<disk>/queue/physical_block_size
94Date: May 2009
95Contact: Martin K. Petersen <martin.petersen@oracle.com>
96Description:
97 This is the smallest unit the storage device can write
98 without resorting to read-modify-write operation. It is
99 usually the same as the logical block size but may be
100 bigger. One example is SATA drives with 4KB sectors
101 that expose a 512-byte logical block size to the
102 operating system.
103
104What: /sys/block/<disk>/queue/minimum_io_size
105Date: April 2009
106Contact: Martin K. Petersen <martin.petersen@oracle.com>
107Description:
108 Storage devices may report a preferred minimum I/O size,
109 which is the smallest request the device can perform
110 without incurring a read-modify-write penalty. For disk
111 drives this is often the physical block size. For RAID
112 arrays it is often the stripe chunk size.
113
114What: /sys/block/<disk>/queue/optimal_io_size
115Date: April 2009
116Contact: Martin K. Petersen <martin.petersen@oracle.com>
117Description:
118 Storage devices may report an optimal I/O size, which is
119 the device's preferred unit of receiving I/O. This is
120 rarely reported for disk drives. For RAID devices it is
121 usually the stripe width or the internal block size.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index e638e15a8895..6bf68053e4b8 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -41,6 +41,49 @@ Description:
41 for the device and attempt to bind to it. For example: 41 for the device and attempt to bind to it. For example:
42 # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id 42 # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
43 43
44What: /sys/bus/pci/drivers/.../remove_id
45Date: February 2009
46Contact: Chris Wright <chrisw@sous-sol.org>
47Description:
48 Writing a device ID to this file will remove an ID
49 that was dynamically added via the new_id sysfs entry.
50 The format for the device ID is:
51 VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device
52 ID, Subsystem Vendor ID, Subsystem Device ID, Class,
53 and Class Mask. The Vendor ID and Device ID fields are
54 required, the rest are optional. After successfully
55 removing an ID, the driver will no longer support the
56 device. This is useful to ensure auto probing won't
57 match the driver to the device. For example:
58 # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
59
60What: /sys/bus/pci/rescan
61Date: January 2009
62Contact: Linux PCI developers <linux-pci@vger.kernel.org>
63Description:
64 Writing a non-zero value to this attribute will
65 force a rescan of all PCI buses in the system, and
66 re-discover previously removed devices.
67 Depends on CONFIG_HOTPLUG.
68
69What: /sys/bus/pci/devices/.../remove
70Date: January 2009
71Contact: Linux PCI developers <linux-pci@vger.kernel.org>
72Description:
73 Writing a non-zero value to this attribute will
74 hot-remove the PCI device and any of its children.
75 Depends on CONFIG_HOTPLUG.
76
77What: /sys/bus/pci/devices/.../rescan
78Date: January 2009
79Contact: Linux PCI developers <linux-pci@vger.kernel.org>
80Description:
81 Writing a non-zero value to this attribute will
82 force a rescan of the device's parent bus and all
83 child buses, and re-discover devices removed earlier
84 from this part of the device tree.
85 Depends on CONFIG_HOTPLUG.
86
44What: /sys/bus/pci/devices/.../vpd 87What: /sys/bus/pci/devices/.../vpd
45Date: February 2008 88Date: February 2008
46Contact: Ben Hutchings <bhutchings@solarflare.com> 89Contact: Ben Hutchings <bhutchings@solarflare.com>
@@ -52,3 +95,37 @@ Description:
52 that some devices may have malformatted data. If the 95 that some devices may have malformatted data. If the
53 underlying VPD has a writable section then the 96 underlying VPD has a writable section then the
54 corresponding section of this file will be writable. 97 corresponding section of this file will be writable.
98
99What: /sys/bus/pci/devices/.../virtfnN
100Date: March 2009
101Contact: Yu Zhao <yu.zhao@intel.com>
102Description:
103 This symbolic link appears when hardware supports the SR-IOV
104 capability and the Physical Function driver has enabled it.
105 The symbolic link points to the PCI device sysfs entry of the
106 Virtual Function whose index is N (0...MaxVFs-1).
107
108What: /sys/bus/pci/devices/.../dep_link
109Date: March 2009
110Contact: Yu Zhao <yu.zhao@intel.com>
111Description:
112 This symbolic link appears when hardware supports the SR-IOV
113 capability and the Physical Function driver has enabled it,
114 and this device has vendor specific dependencies with others.
115 The symbolic link points to the PCI device sysfs entry of
116 Physical Function this device depends on.
117
118What: /sys/bus/pci/devices/.../physfn
119Date: March 2009
120Contact: Yu Zhao <yu.zhao@intel.com>
121Description:
122 This symbolic link appears when a device is a Virtual Function.
123 The symbolic link points to the PCI device sysfs entry of the
124 Physical Function this device associates with.
125
126What: /sys/bus/pci/slots/.../module
127Date: June 2009
128Contact: linux-pci@vger.kernel.org
129Description:
130 This symbolic link points to the PCI hotplug controller driver
131 module that manages the hotplug slot.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
new file mode 100644
index 000000000000..0a92a7c93a62
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -0,0 +1,33 @@
1Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/model
2Date: March 2009
3Kernel Version: 2.6.30
4Contact: iss_storagedev@hp.com
5Description: Displays the SCSI INQUIRY page 0 model for logical drive
6 Y of controller X.
7
8Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
9Date: March 2009
10Kernel Version: 2.6.30
11Contact: iss_storagedev@hp.com
12Description: Displays the SCSI INQUIRY page 0 revision for logical
13 drive Y of controller X.
14
15Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
16Date: March 2009
17Kernel Version: 2.6.30
18Contact: iss_storagedev@hp.com
19Description: Displays the SCSI INQUIRY page 83 serial number for logical
20 drive Y of controller X.
21
22Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
23Date: March 2009
24Kernel Version: 2.6.30
25Contact: iss_storagedev@hp.com
26Description: Displays the SCSI INQUIRY page 0 vendor for logical drive
27 Y of controller X.
28
29Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
30Date: March 2009
31Kernel Version: 2.6.30
32Contact: iss_storagedev@hp.com
33Description: A symbolic link to /sys/block/cciss!cXdY
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
new file mode 100644
index 000000000000..4d55a1888981
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -0,0 +1,125 @@
1What: /sys/class/mtd/
2Date: April 2009
3KernelVersion: 2.6.29
4Contact: linux-mtd@lists.infradead.org
5Description:
6 The mtd/ class subdirectory belongs to the MTD subsystem
7 (MTD core).
8
9What: /sys/class/mtd/mtdX/
10Date: April 2009
11KernelVersion: 2.6.29
12Contact: linux-mtd@lists.infradead.org
13Description:
14 The /sys/class/mtd/mtd{0,1,2,3,...} directories correspond
15 to each /dev/mtdX character device. These may represent
16 physical/simulated flash devices, partitions on a flash
17 device, or concatenated flash devices. They exist regardless
18 of whether CONFIG_MTD_CHAR is actually enabled.
19
20What: /sys/class/mtd/mtdXro/
21Date: April 2009
22KernelVersion: 2.6.29
23Contact: linux-mtd@lists.infradead.org
24Description:
25 These directories provide the corresponding read-only device
26 nodes for /sys/class/mtd/mtdX/ . They are only created
27 (for the benefit of udev) if CONFIG_MTD_CHAR is enabled.
28
29What: /sys/class/mtd/mtdX/dev
30Date: April 2009
31KernelVersion: 2.6.29
32Contact: linux-mtd@lists.infradead.org
33Description:
34 Major and minor numbers of the character device corresponding
35 to this MTD device (in <major>:<minor> format). This is the
36 read-write device so <minor> will be even.
37
38What: /sys/class/mtd/mtdXro/dev
39Date: April 2009
40KernelVersion: 2.6.29
41Contact: linux-mtd@lists.infradead.org
42Description:
43 Major and minor numbers of the character device corresponding
44 to the read-only variant of thie MTD device (in
45 <major>:<minor> format). In this case <minor> will be odd.
46
47What: /sys/class/mtd/mtdX/erasesize
48Date: April 2009
49KernelVersion: 2.6.29
50Contact: linux-mtd@lists.infradead.org
51Description:
52 "Major" erase size for the device. If numeraseregions is
53 zero, this is the eraseblock size for the entire device.
54 Otherwise, the MEMGETREGIONCOUNT/MEMGETREGIONINFO ioctls
55 can be used to determine the actual eraseblock layout.
56
57What: /sys/class/mtd/mtdX/flags
58Date: April 2009
59KernelVersion: 2.6.29
60Contact: linux-mtd@lists.infradead.org
61Description:
62 A hexadecimal value representing the device flags, ORed
63 together:
64
65 0x0400: MTD_WRITEABLE - device is writable
66 0x0800: MTD_BIT_WRITEABLE - single bits can be flipped
67 0x1000: MTD_NO_ERASE - no erase necessary
68 0x2000: MTD_POWERUP_LOCK - always locked after reset
69
70What: /sys/class/mtd/mtdX/name
71Date: April 2009
72KernelVersion: 2.6.29
73Contact: linux-mtd@lists.infradead.org
74Description:
75 A human-readable ASCII name for the device or partition.
76 This will match the name in /proc/mtd .
77
78What: /sys/class/mtd/mtdX/numeraseregions
79Date: April 2009
80KernelVersion: 2.6.29
81Contact: linux-mtd@lists.infradead.org
82Description:
83 For devices that have variable eraseblock sizes, this
84 provides the total number of erase regions. Otherwise,
85 it will read back as zero.
86
87What: /sys/class/mtd/mtdX/oobsize
88Date: April 2009
89KernelVersion: 2.6.29
90Contact: linux-mtd@lists.infradead.org
91Description:
92 Number of OOB bytes per page.
93
94What: /sys/class/mtd/mtdX/size
95Date: April 2009
96KernelVersion: 2.6.29
97Contact: linux-mtd@lists.infradead.org
98Description:
99 Total size of the device/partition, in bytes.
100
101What: /sys/class/mtd/mtdX/type
102Date: April 2009
103KernelVersion: 2.6.29
104Contact: linux-mtd@lists.infradead.org
105Description:
106 One of the following ASCII strings, representing the device
107 type:
108
109 absent, ram, rom, nor, nand, dataflash, ubi, unknown
110
111What: /sys/class/mtd/mtdX/writesize
112Date: April 2009
113KernelVersion: 2.6.29
114Contact: linux-mtd@lists.infradead.org
115Description:
116 Minimal writable flash unit size. This will always be
117 a positive integer.
118
119 In the case of NOR flash it is 1 (even though individual
120 bits can be cleared).
121
122 In the case of NAND flash it is one NAND page (or a
123 half page, or a quarter page).
124
125 In the case of ECC NOR, it is the ECC block size.
diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator
index 873ef1fc1569..e091fa873792 100644
--- a/Documentation/ABI/testing/sysfs-class-regulator
+++ b/Documentation/ABI/testing/sysfs-class-regulator
@@ -4,8 +4,8 @@ KernelVersion: 2.6.26
4Contact: Liam Girdwood <lrg@slimlogic.co.uk> 4Contact: Liam Girdwood <lrg@slimlogic.co.uk>
5Description: 5Description:
6 Some regulator directories will contain a field called 6 Some regulator directories will contain a field called
7 state. This reports the regulator enable status, for 7 state. This reports the regulator enable control, for
8 regulators which can report that value. 8 regulators which can report that input value.
9 9
10 This will be one of the following strings: 10 This will be one of the following strings:
11 11
@@ -14,16 +14,54 @@ Description:
14 'unknown' 14 'unknown'
15 15
16 'enabled' means the regulator output is ON and is supplying 16 'enabled' means the regulator output is ON and is supplying
17 power to the system. 17 power to the system (assuming no error prevents it).
18 18
19 'disabled' means the regulator output is OFF and is not 19 'disabled' means the regulator output is OFF and is not
20 supplying power to the system.. 20 supplying power to the system (unless some non-Linux
21 control has enabled it).
21 22
22 'unknown' means software cannot determine the state, or 23 'unknown' means software cannot determine the state, or
23 the reported state is invalid. 24 the reported state is invalid.
24 25
25 NOTE: this field can be used in conjunction with microvolts 26 NOTE: this field can be used in conjunction with microvolts
26 and microamps to determine regulator output levels. 27 or microamps to determine configured regulator output levels.
28
29
30What: /sys/class/regulator/.../status
31Description:
32 Some regulator directories will contain a field called
33 "status". This reports the current regulator status, for
34 regulators which can report that output value.
35
36 This will be one of the following strings:
37
38 off
39 on
40 error
41 fast
42 normal
43 idle
44 standby
45
46 "off" means the regulator is not supplying power to the
47 system.
48
49 "on" means the regulator is supplying power to the system,
50 and the regulator can't report a detailed operation mode.
51
52 "error" indicates an out-of-regulation status such as being
53 disabled due to thermal shutdown, or voltage being unstable
54 because of problems with the input power supply.
55
56 "fast", "normal", "idle", and "standby" are all detailed
57 regulator operation modes (described elsewhere). They
58 imply "on", but provide more detail.
59
60 Note that regulator status is a function of many inputs,
61 not limited to control inputs from Linux. For example,
62 the actual load presented may trigger "error" status; or
63 a regulator may be enabled by another user, even though
64 Linux did not enable it.
27 65
28 66
29What: /sys/class/regulator/.../type 67What: /sys/class/regulator/.../type
@@ -58,7 +96,7 @@ Description:
58 Some regulator directories will contain a field called 96 Some regulator directories will contain a field called
59 microvolts. This holds the regulator output voltage setting 97 microvolts. This holds the regulator output voltage setting
60 measured in microvolts (i.e. E-6 Volts), for regulators 98 measured in microvolts (i.e. E-6 Volts), for regulators
61 which can report that voltage. 99 which can report the control input for voltage.
62 100
63 NOTE: This value should not be used to determine the regulator 101 NOTE: This value should not be used to determine the regulator
64 output voltage level as this value is the same regardless of 102 output voltage level as this value is the same regardless of
@@ -73,7 +111,7 @@ Description:
73 Some regulator directories will contain a field called 111 Some regulator directories will contain a field called
74 microamps. This holds the regulator output current limit 112 microamps. This holds the regulator output current limit
75 setting measured in microamps (i.e. E-6 Amps), for regulators 113 setting measured in microamps (i.e. E-6 Amps), for regulators
76 which can report that current. 114 which can report the control input for a current limit.
77 115
78 NOTE: This value should not be used to determine the regulator 116 NOTE: This value should not be used to determine the regulator
79 output current level as this value is the same regardless of 117 output current level as this value is the same regardless of
@@ -87,7 +125,7 @@ Contact: Liam Girdwood <lrg@slimlogic.co.uk>
87Description: 125Description:
88 Some regulator directories will contain a field called 126 Some regulator directories will contain a field called
89 opmode. This holds the current regulator operating mode, 127 opmode. This holds the current regulator operating mode,
90 for regulators which can report it. 128 for regulators which can report that control input value.
91 129
92 The opmode value can be one of the following strings: 130 The opmode value can be one of the following strings:
93 131
@@ -101,7 +139,8 @@ Description:
101 139
102 NOTE: This value should not be used to determine the regulator 140 NOTE: This value should not be used to determine the regulator
103 output operating mode as this value is the same regardless of 141 output operating mode as this value is the same regardless of
104 whether the regulator is enabled or disabled. 142 whether the regulator is enabled or disabled. A "status"
143 attribute may be available to determine the actual mode.
105 144
106 145
107What: /sys/class/regulator/.../min_microvolts 146What: /sys/class/regulator/.../min_microvolts
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable
new file mode 100644
index 000000000000..175bb4f70512
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable
@@ -0,0 +1,18 @@
1What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
2Date: August 2008
3KernelVersion: 2.6.27
4Contact: mark.langsdorf@amd.com
5Description: These files exist in every cpu's cache index directories.
6 There are currently 2 cache_disable_# files in each
7 directory. Reading from these files on a supported
8 processor will return that cache disable index value
9 for that processor and node. Writing to one of these
10 files will cause the specificed cache index to be disabled.
11
12 Currently, only AMD Family 10h Processors support cache index
13 disable, and only for their L3 caches. See the BIOS and
14 Kernel Developer's Guide at
15 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
16 for formatting information and other details on the
17 cache index disable.
18Users: joachim.deguara@amd.com
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
index e8ffc70ffe12..4f9ba3c2fca7 100644
--- a/Documentation/ABI/testing/sysfs-firmware-acpi
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -69,9 +69,13 @@ Description:
69 gpe1F: 0 invalid 69 gpe1F: 0 invalid
70 gpe_all: 1192 70 gpe_all: 1192
71 sci: 1194 71 sci: 1194
72 sci_not: 0
72 73
73 sci - The total number of times the ACPI SCI 74 sci - The number of times the ACPI SCI
74 has claimed an interrupt. 75 has been called and claimed an interrupt.
76
77 sci_not - The number of times the ACPI SCI
78 has been called and NOT claimed an interrupt.
75 79
76 gpe_all - count of SCI caused by GPEs. 80 gpe_all - count of SCI caused by GPEs.
77 81
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
new file mode 100644
index 000000000000..5fb709997d96
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -0,0 +1,91 @@
1What: /sys/fs/ext4/<disk>/mb_stats
2Date: March 2008
3Contact: "Theodore Ts'o" <tytso@mit.edu>
4Description:
5 Controls whether the multiblock allocator should
6 collect statistics, which are shown during the unmount.
7 1 means to collect statistics, 0 means not to collect
8 statistics
9
10What: /sys/fs/ext4/<disk>/mb_group_prealloc
11Date: March 2008
12Contact: "Theodore Ts'o" <tytso@mit.edu>
13Description:
14 The multiblock allocator will round up allocation
15 requests to a multiple of this tuning parameter if the
16 stripe size is not set in the ext4 superblock
17
18What: /sys/fs/ext4/<disk>/mb_max_to_scan
19Date: March 2008
20Contact: "Theodore Ts'o" <tytso@mit.edu>
21Description:
22 The maximum number of extents the multiblock allocator
23 will search to find the best extent
24
25What: /sys/fs/ext4/<disk>/mb_min_to_scan
26Date: March 2008
27Contact: "Theodore Ts'o" <tytso@mit.edu>
28Description:
29 The minimum number of extents the multiblock allocator
30 will search to find the best extent
31
32What: /sys/fs/ext4/<disk>/mb_order2_req
33Date: March 2008
34Contact: "Theodore Ts'o" <tytso@mit.edu>
35Description:
36 Tuning parameter which controls the minimum size for
37 requests (as a power of 2) where the buddy cache is
38 used
39
40What: /sys/fs/ext4/<disk>/mb_stream_req
41Date: March 2008
42Contact: "Theodore Ts'o" <tytso@mit.edu>
43Description:
44 Files which have fewer blocks than this tunable
45 parameter will have their blocks allocated out of a
46 block group specific preallocation pool, so that small
47 files are packed closely together. Each large file
48 will have its blocks allocated out of its own unique
49 preallocation pool.
50
51What: /sys/fs/ext4/<disk>/inode_readahead
52Date: March 2008
53Contact: "Theodore Ts'o" <tytso@mit.edu>
54Description:
55 Tuning parameter which controls the maximum number of
56 inode table blocks that ext4's inode table readahead
57 algorithm will pre-read into the buffer cache
58
59What: /sys/fs/ext4/<disk>/delayed_allocation_blocks
60Date: March 2008
61Contact: "Theodore Ts'o" <tytso@mit.edu>
62Description:
63 This file is read-only and shows the number of blocks
64 that are dirty in the page cache, but which do not
65 have their location in the filesystem allocated yet.
66
67What: /sys/fs/ext4/<disk>/lifetime_write_kbytes
68Date: March 2008
69Contact: "Theodore Ts'o" <tytso@mit.edu>
70Description:
71 This file is read-only and shows the number of kilobytes
72 of data that have been written to this filesystem since it was
73 created.
74
75What: /sys/fs/ext4/<disk>/session_write_kbytes
76Date: March 2008
77Contact: "Theodore Ts'o" <tytso@mit.edu>
78Description:
79 This file is read-only and shows the number of
80 kilobytes of data that have been written to this
81 filesystem since it was mounted.
82
83What: /sys/fs/ext4/<disk>/inode_goal
84Date: June 2008
85Contact: "Theodore Ts'o" <tytso@mit.edu>
86Description:
87 Tuning parameter which (if non-zero) controls the goal
88 inode used by the inode allocator in p0reference to
89 all other allocation hueristics. This is intended for
90 debugging use only, and should be 0 on production
91 systems.
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
new file mode 100644
index 000000000000..6dcf75e594fb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -0,0 +1,479 @@
1What: /sys/kernel/slab
2Date: May 2007
3KernelVersion: 2.6.22
4Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
5 Christoph Lameter <cl@linux-foundation.org>
6Description:
7 The /sys/kernel/slab directory contains a snapshot of the
8 internal state of the SLUB allocator for each cache. Certain
9 files may be modified to change the behavior of the cache (and
10 any cache it aliases, if any).
11Users: kernel memory tuning tools
12
13What: /sys/kernel/slab/cache/aliases
14Date: May 2007
15KernelVersion: 2.6.22
16Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
17 Christoph Lameter <cl@linux-foundation.org>
18Description:
19 The aliases file is read-only and specifies how many caches
20 have merged into this cache.
21
22What: /sys/kernel/slab/cache/align
23Date: May 2007
24KernelVersion: 2.6.22
25Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
26 Christoph Lameter <cl@linux-foundation.org>
27Description:
28 The align file is read-only and specifies the cache's object
29 alignment in bytes.
30
31What: /sys/kernel/slab/cache/alloc_calls
32Date: May 2007
33KernelVersion: 2.6.22
34Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
35 Christoph Lameter <cl@linux-foundation.org>
36Description:
37 The alloc_calls file is read-only and lists the kernel code
38 locations from which allocations for this cache were performed.
39 The alloc_calls file only contains information if debugging is
40 enabled for that cache (see Documentation/vm/slub.txt).
41
42What: /sys/kernel/slab/cache/alloc_fastpath
43Date: February 2008
44KernelVersion: 2.6.25
45Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
46 Christoph Lameter <cl@linux-foundation.org>
47Description:
48 The alloc_fastpath file is read-only and specifies how many
49 objects have been allocated using the fast path.
50 Available when CONFIG_SLUB_STATS is enabled.
51
52What: /sys/kernel/slab/cache/alloc_from_partial
53Date: February 2008
54KernelVersion: 2.6.25
55Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
56 Christoph Lameter <cl@linux-foundation.org>
57Description:
58 The alloc_from_partial file is read-only and specifies how
59 many times a cpu slab has been full and it has been refilled
60 by using a slab from the list of partially used slabs.
61 Available when CONFIG_SLUB_STATS is enabled.
62
63What: /sys/kernel/slab/cache/alloc_refill
64Date: February 2008
65KernelVersion: 2.6.25
66Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
67 Christoph Lameter <cl@linux-foundation.org>
68Description:
69 The alloc_refill file is read-only and specifies how many
70 times the per-cpu freelist was empty but there were objects
71 available as the result of remote cpu frees.
72 Available when CONFIG_SLUB_STATS is enabled.
73
74What: /sys/kernel/slab/cache/alloc_slab
75Date: February 2008
76KernelVersion: 2.6.25
77Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
78 Christoph Lameter <cl@linux-foundation.org>
79Description:
80 The alloc_slab file is read-only and specifies how many times
81 a new slab had to be allocated from the page allocator.
82 Available when CONFIG_SLUB_STATS is enabled.
83
84What: /sys/kernel/slab/cache/alloc_slowpath
85Date: February 2008
86KernelVersion: 2.6.25
87Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
88 Christoph Lameter <cl@linux-foundation.org>
89Description:
90 The alloc_slowpath file is read-only and specifies how many
91 objects have been allocated using the slow path because of a
92 refill or allocation from a partial or new slab.
93 Available when CONFIG_SLUB_STATS is enabled.
94
95What: /sys/kernel/slab/cache/cache_dma
96Date: May 2007
97KernelVersion: 2.6.22
98Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
99 Christoph Lameter <cl@linux-foundation.org>
100Description:
101 The cache_dma file is read-only and specifies whether objects
102 are from ZONE_DMA.
103 Available when CONFIG_ZONE_DMA is enabled.
104
105What: /sys/kernel/slab/cache/cpu_slabs
106Date: May 2007
107KernelVersion: 2.6.22
108Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
109 Christoph Lameter <cl@linux-foundation.org>
110Description:
111 The cpu_slabs file is read-only and displays how many cpu slabs
112 are active and their NUMA locality.
113
114What: /sys/kernel/slab/cache/cpuslab_flush
115Date: April 2009
116KernelVersion: 2.6.31
117Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
118 Christoph Lameter <cl@linux-foundation.org>
119Description:
120 The file cpuslab_flush is read-only and specifies how many
121 times a cache's cpu slabs have been flushed as the result of
122 destroying or shrinking a cache, a cpu going offline, or as
123 the result of forcing an allocation from a certain node.
124 Available when CONFIG_SLUB_STATS is enabled.
125
126What: /sys/kernel/slab/cache/ctor
127Date: May 2007
128KernelVersion: 2.6.22
129Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
130 Christoph Lameter <cl@linux-foundation.org>
131Description:
132 The ctor file is read-only and specifies the cache's object
133 constructor function, which is invoked for each object when a
134 new slab is allocated.
135
136What: /sys/kernel/slab/cache/deactivate_empty
137Date: February 2008
138KernelVersion: 2.6.25
139Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
140 Christoph Lameter <cl@linux-foundation.org>
141Description:
142 The file deactivate_empty is read-only and specifies how many
143 times an empty cpu slab was deactivated.
144 Available when CONFIG_SLUB_STATS is enabled.
145
146What: /sys/kernel/slab/cache/deactivate_full
147Date: February 2008
148KernelVersion: 2.6.25
149Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
150 Christoph Lameter <cl@linux-foundation.org>
151Description:
152 The file deactivate_full is read-only and specifies how many
153 times a full cpu slab was deactivated.
154 Available when CONFIG_SLUB_STATS is enabled.
155
156What: /sys/kernel/slab/cache/deactivate_remote_frees
157Date: February 2008
158KernelVersion: 2.6.25
159Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
160 Christoph Lameter <cl@linux-foundation.org>
161Description:
162 The file deactivate_remote_frees is read-only and specifies how
163 many times a cpu slab has been deactivated and contained free
164 objects that were freed remotely.
165 Available when CONFIG_SLUB_STATS is enabled.
166
167What: /sys/kernel/slab/cache/deactivate_to_head
168Date: February 2008
169KernelVersion: 2.6.25
170Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
171 Christoph Lameter <cl@linux-foundation.org>
172Description:
173 The file deactivate_to_head is read-only and specifies how
174 many times a partial cpu slab was deactivated and added to the
175 head of its node's partial list.
176 Available when CONFIG_SLUB_STATS is enabled.
177
178What: /sys/kernel/slab/cache/deactivate_to_tail
179Date: February 2008
180KernelVersion: 2.6.25
181Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
182 Christoph Lameter <cl@linux-foundation.org>
183Description:
184 The file deactivate_to_tail is read-only and specifies how
185 many times a partial cpu slab was deactivated and added to the
186 tail of its node's partial list.
187 Available when CONFIG_SLUB_STATS is enabled.
188
189What: /sys/kernel/slab/cache/destroy_by_rcu
190Date: May 2007
191KernelVersion: 2.6.22
192Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
193 Christoph Lameter <cl@linux-foundation.org>
194Description:
195 The destroy_by_rcu file is read-only and specifies whether
196 slabs (not objects) are freed by rcu.
197
198What: /sys/kernel/slab/cache/free_add_partial
199Date: February 2008
200KernelVersion: 2.6.25
201Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
202 Christoph Lameter <cl@linux-foundation.org>
203Description:
204 The file free_add_partial is read-only and specifies how many
205 times an object has been freed in a full slab so that it had to
206 added to its node's partial list.
207 Available when CONFIG_SLUB_STATS is enabled.
208
209What: /sys/kernel/slab/cache/free_calls
210Date: May 2007
211KernelVersion: 2.6.22
212Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
213 Christoph Lameter <cl@linux-foundation.org>
214Description:
215 The free_calls file is read-only and lists the locations of
216 object frees if slab debugging is enabled (see
217 Documentation/vm/slub.txt).
218
219What: /sys/kernel/slab/cache/free_fastpath
220Date: February 2008
221KernelVersion: 2.6.25
222Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
223 Christoph Lameter <cl@linux-foundation.org>
224Description:
225 The free_fastpath file is read-only and specifies how many
226 objects have been freed using the fast path because it was an
227 object from the cpu slab.
228 Available when CONFIG_SLUB_STATS is enabled.
229
230What: /sys/kernel/slab/cache/free_frozen
231Date: February 2008
232KernelVersion: 2.6.25
233Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
234 Christoph Lameter <cl@linux-foundation.org>
235Description:
236 The free_frozen file is read-only and specifies how many
237 objects have been freed to a frozen slab (i.e. a remote cpu
238 slab).
239 Available when CONFIG_SLUB_STATS is enabled.
240
241What: /sys/kernel/slab/cache/free_remove_partial
242Date: February 2008
243KernelVersion: 2.6.25
244Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
245 Christoph Lameter <cl@linux-foundation.org>
246Description:
247 The file free_remove_partial is read-only and specifies how
248 many times an object has been freed to a now-empty slab so
249 that it had to be removed from its node's partial list.
250 Available when CONFIG_SLUB_STATS is enabled.
251
252What: /sys/kernel/slab/cache/free_slab
253Date: February 2008
254KernelVersion: 2.6.25
255Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
256 Christoph Lameter <cl@linux-foundation.org>
257Description:
258 The free_slab file is read-only and specifies how many times an
259 empty slab has been freed back to the page allocator.
260 Available when CONFIG_SLUB_STATS is enabled.
261
262What: /sys/kernel/slab/cache/free_slowpath
263Date: February 2008
264KernelVersion: 2.6.25
265Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
266 Christoph Lameter <cl@linux-foundation.org>
267Description:
268 The free_slowpath file is read-only and specifies how many
269 objects have been freed using the slow path (i.e. to a full or
270 partial slab).
271 Available when CONFIG_SLUB_STATS is enabled.
272
273What: /sys/kernel/slab/cache/hwcache_align
274Date: May 2007
275KernelVersion: 2.6.22
276Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
277 Christoph Lameter <cl@linux-foundation.org>
278Description:
279 The hwcache_align file is read-only and specifies whether
280 objects are aligned on cachelines.
281
282What: /sys/kernel/slab/cache/min_partial
283Date: February 2009
284KernelVersion: 2.6.30
285Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
286 David Rientjes <rientjes@google.com>
287Description:
288 The min_partial file specifies how many empty slabs shall
289 remain on a node's partial list to avoid the overhead of
290 allocating new slabs. Such slabs may be reclaimed by utilizing
291 the shrink file.
292
293What: /sys/kernel/slab/cache/object_size
294Date: May 2007
295KernelVersion: 2.6.22
296Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
297 Christoph Lameter <cl@linux-foundation.org>
298Description:
299 The object_size file is read-only and specifies the cache's
300 object size.
301
302What: /sys/kernel/slab/cache/objects
303Date: May 2007
304KernelVersion: 2.6.22
305Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
306 Christoph Lameter <cl@linux-foundation.org>
307Description:
308 The objects file is read-only and displays how many objects are
309 active and from which nodes they are from.
310
311What: /sys/kernel/slab/cache/objects_partial
312Date: April 2008
313KernelVersion: 2.6.26
314Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
315 Christoph Lameter <cl@linux-foundation.org>
316Description:
317 The objects_partial file is read-only and displays how many
318 objects are on partial slabs and from which nodes they are
319 from.
320
321What: /sys/kernel/slab/cache/objs_per_slab
322Date: May 2007
323KernelVersion: 2.6.22
324Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
325 Christoph Lameter <cl@linux-foundation.org>
326Description:
327 The file objs_per_slab is read-only and specifies how many
328 objects may be allocated from a single slab of the order
329 specified in /sys/kernel/slab/cache/order.
330
331What: /sys/kernel/slab/cache/order
332Date: May 2007
333KernelVersion: 2.6.22
334Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
335 Christoph Lameter <cl@linux-foundation.org>
336Description:
337 The order file specifies the page order at which new slabs are
338 allocated. It is writable and can be changed to increase the
339 number of objects per slab. If a slab cannot be allocated
340 because of fragmentation, SLUB will retry with the minimum order
341 possible depending on its characteristics.
342
343What: /sys/kernel/slab/cache/order_fallback
344Date: April 2008
345KernelVersion: 2.6.26
346Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
347 Christoph Lameter <cl@linux-foundation.org>
348Description:
349 The file order_fallback is read-only and specifies how many
350 times an allocation of a new slab has not been possible at the
351 cache's order and instead fallen back to its minimum possible
352 order.
353 Available when CONFIG_SLUB_STATS is enabled.
354
355What: /sys/kernel/slab/cache/partial
356Date: May 2007
357KernelVersion: 2.6.22
358Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
359 Christoph Lameter <cl@linux-foundation.org>
360Description:
361 The partial file is read-only and displays how long many
362 partial slabs there are and how long each node's list is.
363
364What: /sys/kernel/slab/cache/poison
365Date: May 2007
366KernelVersion: 2.6.22
367Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
368 Christoph Lameter <cl@linux-foundation.org>
369Description:
370 The poison file specifies whether objects should be poisoned
371 when a new slab is allocated.
372
373What: /sys/kernel/slab/cache/reclaim_account
374Date: May 2007
375KernelVersion: 2.6.22
376Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
377 Christoph Lameter <cl@linux-foundation.org>
378Description:
379 The reclaim_account file specifies whether the cache's objects
380 are reclaimable (and grouped by their mobility).
381
382What: /sys/kernel/slab/cache/red_zone
383Date: May 2007
384KernelVersion: 2.6.22
385Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
386 Christoph Lameter <cl@linux-foundation.org>
387Description:
388 The red_zone file specifies whether the cache's objects are red
389 zoned.
390
391What: /sys/kernel/slab/cache/remote_node_defrag_ratio
392Date: January 2008
393KernelVersion: 2.6.25
394Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
395 Christoph Lameter <cl@linux-foundation.org>
396Description:
397 The file remote_node_defrag_ratio specifies the percentage of
398 times SLUB will attempt to refill the cpu slab with a partial
399 slab from a remote node as opposed to allocating a new slab on
400 the local node. This reduces the amount of wasted memory over
401 the entire system but can be expensive.
402 Available when CONFIG_NUMA is enabled.
403
404What: /sys/kernel/slab/cache/sanity_checks
405Date: May 2007
406KernelVersion: 2.6.22
407Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
408 Christoph Lameter <cl@linux-foundation.org>
409Description:
410 The sanity_checks file specifies whether expensive checks
411 should be performed on free and, at minimum, enables double free
412 checks. Caches that enable sanity_checks cannot be merged with
413 caches that do not.
414
415What: /sys/kernel/slab/cache/shrink
416Date: May 2007
417KernelVersion: 2.6.22
418Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
419 Christoph Lameter <cl@linux-foundation.org>
420Description:
421 The shrink file is written when memory should be reclaimed from
422 a cache. Empty partial slabs are freed and the partial list is
423 sorted so the slabs with the fewest available objects are used
424 first.
425
426What: /sys/kernel/slab/cache/slab_size
427Date: May 2007
428KernelVersion: 2.6.22
429Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
430 Christoph Lameter <cl@linux-foundation.org>
431Description:
432 The slab_size file is read-only and specifies the object size
433 with metadata (debugging information and alignment) in bytes.
434
435What: /sys/kernel/slab/cache/slabs
436Date: May 2007
437KernelVersion: 2.6.22
438Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
439 Christoph Lameter <cl@linux-foundation.org>
440Description:
441 The slabs file is read-only and displays how long many slabs
442 there are (both cpu and partial) and from which nodes they are
443 from.
444
445What: /sys/kernel/slab/cache/store_user
446Date: May 2007
447KernelVersion: 2.6.22
448Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
449 Christoph Lameter <cl@linux-foundation.org>
450Description:
451 The store_user file specifies whether the location of
452 allocation or free should be tracked for a cache.
453
454What: /sys/kernel/slab/cache/total_objects
455Date: April 2008
456KernelVersion: 2.6.26
457Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
458 Christoph Lameter <cl@linux-foundation.org>
459Description:
460 The total_objects file is read-only and displays how many total
461 objects a cache has and from which nodes they are from.
462
463What: /sys/kernel/slab/cache/trace
464Date: May 2007
465KernelVersion: 2.6.22
466Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
467 Christoph Lameter <cl@linux-foundation.org>
468Description:
469 The trace file specifies whether object allocations and frees
470 should be traced.
471
472What: /sys/kernel/slab/cache/validate
473Date: May 2007
474KernelVersion: 2.6.22
475Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
476 Christoph Lameter <cl@linux-foundation.org>
477Description:
478 Writing to the validate file causes SLUB to traverse all of its
479 cache's objects and check the validity of metadata.
diff --git a/Documentation/ABI/testing/sysfs-pps b/Documentation/ABI/testing/sysfs-pps
new file mode 100644
index 000000000000..25028c7bc37d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps
@@ -0,0 +1,73 @@
1What: /sys/class/pps/
2Date: February 2008
3Contact: Rodolfo Giometti <giometti@linux.it>
4Description:
5 The /sys/class/pps/ directory will contain files and
6 directories that will provide a unified interface to
7 the PPS sources.
8
9What: /sys/class/pps/ppsX/
10Date: February 2008
11Contact: Rodolfo Giometti <giometti@linux.it>
12Description:
13 The /sys/class/pps/ppsX/ directory is related to X-th
14 PPS source into the system. Each directory will
15 contain files to manage and control its PPS source.
16
17What: /sys/class/pps/ppsX/assert
18Date: February 2008
19Contact: Rodolfo Giometti <giometti@linux.it>
20Description:
21 The /sys/class/pps/ppsX/assert file reports the assert events
22 and the assert sequence number of the X-th source in the form:
23
24 <secs>.<nsec>#<sequence>
25
26 If the source has no assert events the content of this file
27 is empty.
28
29What: /sys/class/pps/ppsX/clear
30Date: February 2008
31Contact: Rodolfo Giometti <giometti@linux.it>
32Description:
33 The /sys/class/pps/ppsX/clear file reports the clear events
34 and the clear sequence number of the X-th source in the form:
35
36 <secs>.<nsec>#<sequence>
37
38 If the source has no clear events the content of this file
39 is empty.
40
41What: /sys/class/pps/ppsX/mode
42Date: February 2008
43Contact: Rodolfo Giometti <giometti@linux.it>
44Description:
45 The /sys/class/pps/ppsX/mode file reports the functioning
46 mode of the X-th source in hexadecimal encoding.
47
48 Please, refer to linux/include/linux/pps.h for further
49 info.
50
51What: /sys/class/pps/ppsX/echo
52Date: February 2008
53Contact: Rodolfo Giometti <giometti@linux.it>
54Description:
55 The /sys/class/pps/ppsX/echo file reports if the X-th does
56 or does not support an "echo" function.
57
58What: /sys/class/pps/ppsX/name
59Date: February 2008
60Contact: Rodolfo Giometti <giometti@linux.it>
61Description:
62 The /sys/class/pps/ppsX/name file reports the name of the
63 X-th source.
64
65What: /sys/class/pps/ppsX/path
66Date: February 2008
67Contact: Rodolfo Giometti <giometti@linux.it>
68Description:
69 The /sys/class/pps/ppsX/path file reports the path name of
70 the device connected with the X-th source.
71
72 If the source is not connected with any device the content
73 of this file is empty.
diff --git a/Documentation/Changes b/Documentation/Changes
index b95082be4d5e..6d0f1efc5bf6 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -29,7 +29,7 @@ hardware, for example, you probably needn't concern yourself with
29isdn4k-utils. 29isdn4k-utils.
30 30
31o Gnu C 3.2 # gcc --version 31o Gnu C 3.2 # gcc --version
32o Gnu make 3.79.1 # make --version 32o Gnu make 3.80 # make --version
33o binutils 2.12 # ld -v 33o binutils 2.12 # ld -v
34o util-linux 2.10o # fdformat --version 34o util-linux 2.10o # fdformat --version
35o module-init-tools 0.9.10 # depmod -V 35o module-init-tools 0.9.10 # depmod -V
@@ -48,6 +48,7 @@ o procps 3.2.0 # ps --version
48o oprofile 0.9 # oprofiled --version 48o oprofile 0.9 # oprofiled --version
49o udev 081 # udevinfo -V 49o udev 081 # udevinfo -V
50o grub 0.93 # grub --version 50o grub 0.93 # grub --version
51o mcelog 0.6
51 52
52Kernel compilation 53Kernel compilation
53================== 54==================
@@ -61,7 +62,7 @@ computer.
61Make 62Make
62---- 63----
63 64
64You will need Gnu make 3.79.1 or later to build the kernel. 65You will need Gnu make 3.80 or later to build the kernel.
65 66
66Binutils 67Binutils
67-------- 68--------
@@ -71,6 +72,13 @@ assembling the 16-bit boot code, removing the need for as86 to compile
71your kernel. This change does, however, mean that you need a recent 72your kernel. This change does, however, mean that you need a recent
72release of binutils. 73release of binutils.
73 74
75Perl
76----
77
78You will need perl 5 and the following modules: Getopt::Long, Getopt::Std,
79File::Basename, and File::Find to build the kernel.
80
81
74System utilities 82System utilities
75================ 83================
76 84
@@ -276,6 +284,16 @@ before running exportfs or mountd. It is recommended that all NFS
276services be protected from the internet-at-large by a firewall where 284services be protected from the internet-at-large by a firewall where
277that is possible. 285that is possible.
278 286
287mcelog
288------
289
290In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
291as a regular cronjob similar to the x86-64 kernel to process and log
292machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
293events are errors reported by the CPU. Processing them is strongly encouraged.
294All x86-64 kernels since 2.6.4 require the mcelog utility to
295process machine checks.
296
279Getting updated software 297Getting updated software
280======================== 298========================
281 299
@@ -365,6 +383,10 @@ FUSE
365---- 383----
366o <http://sourceforge.net/projects/fuse> 384o <http://sourceforge.net/projects/fuse>
367 385
386mcelog
387------
388o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
389
368Networking 390Networking
369********** 391**********
370 392
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 72968cd5eaf3..8bb37237ebd2 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -698,8 +698,8 @@ very often is not. Abundant use of the inline keyword leads to a much bigger
698kernel, which in turn slows the system as a whole down, due to a bigger 698kernel, which in turn slows the system as a whole down, due to a bigger
699icache footprint for the CPU and simply because there is less memory 699icache footprint for the CPU and simply because there is less memory
700available for the pagecache. Just think about it; a pagecache miss causes a 700available for the pagecache. Just think about it; a pagecache miss causes a
701disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles 701disk seek, which easily takes 5 milliseconds. There are a LOT of cpu cycles
702that can go into these 5 miliseconds. 702that can go into these 5 milliseconds.
703 703
704A reasonable rule of thumb is to not put inline at functions that have more 704A reasonable rule of thumb is to not put inline at functions that have more
705than 3 lines of code in them. An exception to this rule are the cases where 705than 3 lines of code in them. An exception to this rule are the cases where
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 2a3fcc55e981..5aceb88b3f8b 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -609,3 +609,121 @@ size is the size (and should be a page-sized multiple).
609The return value will be either a pointer to the processor virtual 609The return value will be either a pointer to the processor virtual
610address of the memory, or an error (via PTR_ERR()) if any part of the 610address of the memory, or an error (via PTR_ERR()) if any part of the
611region is occupied. 611region is occupied.
612
613Part III - Debug drivers use of the DMA-API
614-------------------------------------------
615
616The DMA-API as described above as some constraints. DMA addresses must be
617released with the corresponding function with the same size for example. With
618the advent of hardware IOMMUs it becomes more and more important that drivers
619do not violate those constraints. In the worst case such a violation can
620result in data corruption up to destroyed filesystems.
621
622To debug drivers and find bugs in the usage of the DMA-API checking code can
623be compiled into the kernel which will tell the developer about those
624violations. If your architecture supports it you can select the "Enable
625debugging of DMA-API usage" option in your kernel configuration. Enabling this
626option has a performance impact. Do not enable it in production kernels.
627
628If you boot the resulting kernel will contain code which does some bookkeeping
629about what DMA memory was allocated for which device. If this code detects an
630error it prints a warning message with some details into your kernel log. An
631example warning message may look like this:
632
633------------[ cut here ]------------
634WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
635 check_unmap+0x203/0x490()
636Hardware name:
637forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
638 function [device address=0x00000000640444be] [size=66 bytes] [mapped as
639single] [unmapped as page]
640Modules linked in: nfsd exportfs bridge stp llc r8169
641Pid: 0, comm: swapper Tainted: G W 2.6.28-dmatest-09289-g8bb99c0 #1
642Call Trace:
643 <IRQ> [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
644 [<ffffffff80647b70>] _spin_unlock+0x10/0x30
645 [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
646 [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
647 [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
648 [<ffffffff80252f96>] queue_work+0x56/0x60
649 [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
650 [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
651 [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
652 [<ffffffff80235177>] find_busiest_group+0x207/0x8a0
653 [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
654 [<ffffffff803c7ea3>] check_unmap+0x203/0x490
655 [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
656 [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
657 [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
658 [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
659 [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
660 [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
661 [<ffffffff8020c093>] ret_from_intr+0x0/0xa
662 <EOI> <4>---[ end trace f6435a98e2a38c0e ]---
663
664The driver developer can find the driver and the device including a stacktrace
665of the DMA-API call which caused this warning.
666
667Per default only the first error will result in a warning message. All other
668errors will only silently counted. This limitation exist to prevent the code
669from flooding your kernel log. To support debugging a device driver this can
670be disabled via debugfs. See the debugfs interface documentation below for
671details.
672
673The debugfs directory for the DMA-API debugging code is called dma-api/. In
674this directory the following files can currently be found:
675
676 dma-api/all_errors This file contains a numeric value. If this
677 value is not equal to zero the debugging code
678 will print a warning for every error it finds
679 into the kernel log. Be careful with this
680 option, as it can easily flood your logs.
681
682 dma-api/disabled This read-only file contains the character 'Y'
683 if the debugging code is disabled. This can
684 happen when it runs out of memory or if it was
685 disabled at boot time
686
687 dma-api/error_count This file is read-only and shows the total
688 numbers of errors found.
689
690 dma-api/num_errors The number in this file shows how many
691 warnings will be printed to the kernel log
692 before it stops. This number is initialized to
693 one at system boot and be set by writing into
694 this file
695
696 dma-api/min_free_entries
697 This read-only file can be read to get the
698 minimum number of free dma_debug_entries the
699 allocator has ever seen. If this value goes
700 down to zero the code will disable itself
701 because it is not longer reliable.
702
703 dma-api/num_free_entries
704 The current number of free dma_debug_entries
705 in the allocator.
706
707 dma-api/driver-filter
708 You can write a name of a driver into this file
709 to limit the debug output to requests from that
710 particular driver. Write an empty string to
711 that file to disable the filter and see
712 all errors again.
713
714If you have this code compiled into your kernel it will be enabled by default.
715If you want to boot without the bookkeeping anyway you can provide
716'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
717Notice that you can not enable it again at runtime. You have to reboot to do
718so.
719
720If you want to see debug messages only for a special device driver you can
721specify the dma_debug_driver=<drivername> parameter. This will enable the
722driver filter at boot time. The debug code will only print errors for that
723driver afterwards. This filter can be disabled or changed later using debugfs.
724
725When the code disables itself at runtime this is most likely because it ran
726out of dma_debug_entries. These entries are preallocated at boot. The number
727of preallocated entries is defined per architecture. If it is too low for you
728boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
729architectural default.
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index b2a4d6d244d9..01f24e94bdb6 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -136,7 +136,7 @@ exactly why.
136The standard 32-bit addressing PCI device would do something like 136The standard 32-bit addressing PCI device would do something like
137this: 137this:
138 138
139 if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { 139 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
140 printk(KERN_WARNING 140 printk(KERN_WARNING
141 "mydev: No suitable DMA available.\n"); 141 "mydev: No suitable DMA available.\n");
142 goto ignore_this_device; 142 goto ignore_this_device;
@@ -155,9 +155,9 @@ all 64-bits when accessing streaming DMA:
155 155
156 int using_dac; 156 int using_dac;
157 157
158 if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { 158 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
159 using_dac = 1; 159 using_dac = 1;
160 } else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { 160 } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
161 using_dac = 0; 161 using_dac = 0;
162 } else { 162 } else {
163 printk(KERN_WARNING 163 printk(KERN_WARNING
@@ -170,14 +170,14 @@ the case would look like this:
170 170
171 int using_dac, consistent_using_dac; 171 int using_dac, consistent_using_dac;
172 172
173 if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { 173 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
174 using_dac = 1; 174 using_dac = 1;
175 consistent_using_dac = 1; 175 consistent_using_dac = 1;
176 pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); 176 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
177 } else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { 177 } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
178 using_dac = 0; 178 using_dac = 0;
179 consistent_using_dac = 0; 179 consistent_using_dac = 0;
180 pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); 180 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
181 } else { 181 } else {
182 printk(KERN_WARNING 182 printk(KERN_WARNING
183 "mydev: No suitable DMA available.\n"); 183 "mydev: No suitable DMA available.\n");
@@ -192,7 +192,7 @@ check the return value from pci_set_consistent_dma_mask().
192Finally, if your device can only drive the low 24-bits of 192Finally, if your device can only drive the low 24-bits of
193address during PCI bus mastering you might do something like: 193address during PCI bus mastering you might do something like:
194 194
195 if (pci_set_dma_mask(pdev, DMA_24BIT_MASK)) { 195 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) {
196 printk(KERN_WARNING 196 printk(KERN_WARNING
197 "mydev: 24-bit DMA addressing not available.\n"); 197 "mydev: 24-bit DMA addressing not available.\n");
198 goto ignore_this_device; 198 goto ignore_this_device;
@@ -213,7 +213,7 @@ most specific mask.
213 213
214Here is pseudo-code showing how this might be done: 214Here is pseudo-code showing how this might be done:
215 215
216 #define PLAYBACK_ADDRESS_BITS DMA_32BIT_MASK 216 #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32)
217 #define RECORD_ADDRESS_BITS 0x00ffffff 217 #define RECORD_ADDRESS_BITS 0x00ffffff
218 218
219 struct my_sound_card *card; 219 struct my_sound_card *card;
diff --git a/Documentation/DocBook/.gitignore b/Documentation/DocBook/.gitignore
index c102c02ecf89..c6def352fe39 100644
--- a/Documentation/DocBook/.gitignore
+++ b/Documentation/DocBook/.gitignore
@@ -4,3 +4,7 @@
4*.html 4*.html
5*.9.gz 5*.9.gz
6*.9 6*.9
7*.aux
8*.dvi
9*.log
10*.out
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 1462ed86d40a..9632444f6c62 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -12,7 +12,9 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
12 kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ 12 kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ 14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
15 mac80211.xml debugobjects.xml sh.xml regulator.xml 15 mac80211.xml debugobjects.xml sh.xml regulator.xml \
16 alsa-driver-api.xml writing-an-alsa-driver.xml \
17 tracepoint.xml
16 18
17### 19###
18# The build process is as follows (targets): 20# The build process is as follows (targets):
@@ -30,7 +32,7 @@ PS_METHOD = $(prefer-db2x)
30 32
31### 33###
32# The targets that may be used. 34# The targets that may be used.
33PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs 35PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs cleandocs
34 36
35BOOKS := $(addprefix $(obj)/,$(DOCBOOKS)) 37BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
36xmldocs: $(BOOKS) 38xmldocs: $(BOOKS)
@@ -142,7 +144,8 @@ quiet_cmd_db2pdf = PDF $@
142 $(call cmd,db2pdf) 144 $(call cmd,db2pdf)
143 145
144 146
145main_idx = Documentation/DocBook/index.html 147index = index.html
148main_idx = Documentation/DocBook/$(index)
146build_main_index = rm -rf $(main_idx) && \ 149build_main_index = rm -rf $(main_idx) && \
147 echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \ 150 echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \
148 echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \ 151 echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \
@@ -212,11 +215,12 @@ silent_gen_xml = :
212dochelp: 215dochelp:
213 @echo ' Linux kernel internal documentation in different formats:' 216 @echo ' Linux kernel internal documentation in different formats:'
214 @echo ' htmldocs - HTML' 217 @echo ' htmldocs - HTML'
215 @echo ' installmandocs - install man pages generated by mandocs'
216 @echo ' mandocs - man pages'
217 @echo ' pdfdocs - PDF' 218 @echo ' pdfdocs - PDF'
218 @echo ' psdocs - Postscript' 219 @echo ' psdocs - Postscript'
219 @echo ' xmldocs - XML DocBook' 220 @echo ' xmldocs - XML DocBook'
221 @echo ' mandocs - man pages'
222 @echo ' installmandocs - install man pages generated by mandocs'
223 @echo ' cleandocs - clean all generated DocBook files'
220 224
221### 225###
222# Temporary files left by various tools 226# Temporary files left by various tools
@@ -230,10 +234,14 @@ clean-files := $(DOCBOOKS) \
230 $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ 234 $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \
231 $(patsubst %.xml, %.html, $(DOCBOOKS)) \ 235 $(patsubst %.xml, %.html, $(DOCBOOKS)) \
232 $(patsubst %.xml, %.9, $(DOCBOOKS)) \ 236 $(patsubst %.xml, %.9, $(DOCBOOKS)) \
233 $(C-procfs-example) 237 $(C-procfs-example) $(index)
234 238
235clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man 239clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
236 240
241cleandocs:
242 $(Q)rm -f $(call objectify, $(clean-files))
243 $(Q)rm -rf $(call objectify, $(clean-dirs))
244
237# Declare the contents of the .PHONY variable as phony. We keep that 245# Declare the contents of the .PHONY variable as phony. We keep that
238# information in a variable se we can use it in if_changed and friends. 246# information in a variable se we can use it in if_changed and friends.
239 247
diff --git a/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl b/Documentation/DocBook/alsa-driver-api.tmpl
index 9d644f7e241e..0230a96f0564 100644
--- a/Documentation/sound/alsa/DocBook/alsa-driver-api.tmpl
+++ b/Documentation/DocBook/alsa-driver-api.tmpl
@@ -1,11 +1,11 @@
1<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN"> 1<?xml version="1.0" encoding="UTF-8"?>
2 2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3<book> 3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4<?dbhtml filename="index.html">
5 4
6<!-- ****************************************************** --> 5<!-- ****************************************************** -->
7<!-- Header --> 6<!-- Header -->
8<!-- ****************************************************** --> 7<!-- ****************************************************** -->
8<book id="ALSA-Driver-API">
9 <bookinfo> 9 <bookinfo>
10 <title>The ALSA Driver API</title> 10 <title>The ALSA Driver API</title>
11 11
@@ -35,6 +35,8 @@
35 35
36 </bookinfo> 36 </bookinfo>
37 37
38<toc></toc>
39
38 <chapter><title>Management of Cards and Devices</title> 40 <chapter><title>Management of Cards and Devices</title>
39 <sect1><title>Card Management</title> 41 <sect1><title>Card Management</title>
40!Esound/core/init.c 42!Esound/core/init.c
@@ -71,6 +73,10 @@
71!Esound/pci/ac97/ac97_codec.c 73!Esound/pci/ac97/ac97_codec.c
72!Esound/pci/ac97/ac97_pcm.c 74!Esound/pci/ac97/ac97_pcm.c
73 </sect1> 75 </sect1>
76 <sect1><title>Virtual Master Control API</title>
77!Esound/core/vmaster.c
78!Iinclude/sound/control.h
79 </sect1>
74 </chapter> 80 </chapter>
75 <chapter><title>MIDI API</title> 81 <chapter><title>MIDI API</title>
76 <sect1><title>Raw MIDI API</title> 82 <sect1><title>Raw MIDI API</title>
@@ -89,6 +95,9 @@
89 <sect1><title>Hardware-Dependent Devices API</title> 95 <sect1><title>Hardware-Dependent Devices API</title>
90!Esound/core/hwdep.c 96!Esound/core/hwdep.c
91 </sect1> 97 </sect1>
98 <sect1><title>Jack Abstraction Layer API</title>
99!Esound/core/jack.c
100 </sect1>
92 <sect1><title>ISA DMA Helpers</title> 101 <sect1><title>ISA DMA Helpers</title>
93!Esound/core/isadma.c 102!Esound/core/isadma.c
94 </sect1> 103 </sect1>
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
index 7f5f218015fe..08ff908aa7a2 100644
--- a/Documentation/DocBook/debugobjects.tmpl
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -106,7 +106,7 @@
106 number of errors are printk'ed including a full stack trace. 106 number of errors are printk'ed including a full stack trace.
107 </para> 107 </para>
108 <para> 108 <para>
109 The statistics are available via debugfs/debug_objects/stats. 109 The statistics are available via /sys/kernel/debug/debug_objects/stats.
110 They provide information about the number of warnings and the 110 They provide information about the number of warnings and the
111 number of successful fixups along with information about the 111 number of successful fixups along with information about the
112 usage of the internal tracking objects and the state of the 112 usage of the internal tracking objects and the state of the
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
index 3a882d9a90a9..c671a0168096 100644
--- a/Documentation/DocBook/genericirq.tmpl
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -440,6 +440,7 @@ desc->chip->end();
440 used in the generic IRQ layer. 440 used in the generic IRQ layer.
441 </para> 441 </para>
442!Iinclude/linux/irq.h 442!Iinclude/linux/irq.h
443!Iinclude/linux/interrupt.h
443 </chapter> 444 </chapter>
444 445
445 <chapter id="pubfunctions"> 446 <chapter id="pubfunctions">
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index bc962cda6504..44b3def961a2 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -190,15 +190,20 @@ X!Ekernel/module.c
190!Edrivers/pci/pci.c 190!Edrivers/pci/pci.c
191!Edrivers/pci/pci-driver.c 191!Edrivers/pci/pci-driver.c
192!Edrivers/pci/remove.c 192!Edrivers/pci/remove.c
193!Edrivers/pci/pci-acpi.c
194!Edrivers/pci/search.c 193!Edrivers/pci/search.c
195!Edrivers/pci/msi.c 194!Edrivers/pci/msi.c
196!Edrivers/pci/bus.c 195!Edrivers/pci/bus.c
196!Edrivers/pci/access.c
197!Edrivers/pci/irq.c
198!Edrivers/pci/htirq.c
197<!-- FIXME: Removed for now since no structured comments in source 199<!-- FIXME: Removed for now since no structured comments in source
198X!Edrivers/pci/hotplug.c 200X!Edrivers/pci/hotplug.c
199--> 201-->
200!Edrivers/pci/probe.c 202!Edrivers/pci/probe.c
203!Edrivers/pci/slot.c
201!Edrivers/pci/rom.c 204!Edrivers/pci/rom.c
205!Edrivers/pci/iov.c
206!Idrivers/pci/pci-sysfs.c
202 </sect1> 207 </sect1>
203 <sect1><title>PCI Hotplug Support Library</title> 208 <sect1><title>PCI Hotplug Support Library</title>
204!Edrivers/pci/hotplug/pci_hotplug_core.c 209!Edrivers/pci/hotplug/pci_hotplug_core.c
@@ -258,7 +263,7 @@ X!Earch/x86/kernel/mca_32.c
258!Eblock/blk-tag.c 263!Eblock/blk-tag.c
259!Iblock/blk-tag.c 264!Iblock/blk-tag.c
260!Eblock/blk-integrity.c 265!Eblock/blk-integrity.c
261!Iblock/blktrace.c 266!Ikernel/trace/blktrace.c
262!Iblock/genhd.c 267!Iblock/genhd.c
263!Eblock/genhd.c 268!Eblock/genhd.c
264 </chapter> 269 </chapter>
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 372dec20c8da..5cff41a5fa7c 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -281,7 +281,7 @@
281 seriously wrong while debugging, it will most often be the case 281 seriously wrong while debugging, it will most often be the case
282 that you want to enable gdb to be verbose about its target 282 that you want to enable gdb to be verbose about its target
283 communications. You do this prior to issuing the <constant>target 283 communications. You do this prior to issuing the <constant>target
284 remote</constant> command by typing in: <constant>set remote debug 1</constant> 284 remote</constant> command by typing in: <constant>set debug remote 1</constant>
285 </para> 285 </para>
286 </chapter> 286 </chapter>
287 <chapter id="KGDBTestSuite"> 287 <chapter id="KGDBTestSuite">
diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl
index 77c3c202991b..e36986663570 100644
--- a/Documentation/DocBook/mac80211.tmpl
+++ b/Documentation/DocBook/mac80211.tmpl
@@ -17,8 +17,7 @@
17 </authorgroup> 17 </authorgroup>
18 18
19 <copyright> 19 <copyright>
20 <year>2007</year> 20 <year>2007-2009</year>
21 <year>2008</year>
22 <holder>Johannes Berg</holder> 21 <holder>Johannes Berg</holder>
23 </copyright> 22 </copyright>
24 23
@@ -146,7 +145,6 @@ usage should require reading the full document.
146 interface in STA mode at first! 145 interface in STA mode at first!
147 </para> 146 </para>
148!Finclude/net/mac80211.h ieee80211_if_init_conf 147!Finclude/net/mac80211.h ieee80211_if_init_conf
149!Finclude/net/mac80211.h ieee80211_if_conf
150 </chapter> 148 </chapter>
151 149
152 <chapter id="rx-tx"> 150 <chapter id="rx-tx">
@@ -165,8 +163,8 @@ usage should require reading the full document.
165!Pinclude/net/mac80211.h Frame format 163!Pinclude/net/mac80211.h Frame format
166 </sect1> 164 </sect1>
167 <sect1> 165 <sect1>
168 <title>Alignment issues</title> 166 <title>Packet alignment</title>
169 <para>TBD</para> 167!Pnet/mac80211/rx.c Packet alignment
170 </sect1> 168 </sect1>
171 <sect1> 169 <sect1>
172 <title>Calling into mac80211 from interrupts</title> 170 <title>Calling into mac80211 from interrupts</title>
@@ -223,6 +221,17 @@ usage should require reading the full document.
223!Finclude/net/mac80211.h ieee80211_key_flags 221!Finclude/net/mac80211.h ieee80211_key_flags
224 </chapter> 222 </chapter>
225 223
224 <chapter id="powersave">
225 <title>Powersave support</title>
226!Pinclude/net/mac80211.h Powersave support
227 </chapter>
228
229 <chapter id="beacon-filter">
230 <title>Beacon filter support</title>
231!Pinclude/net/mac80211.h Beacon filter support
232!Finclude/net/mac80211.h ieee80211_beacon_loss
233 </chapter>
234
226 <chapter id="qos"> 235 <chapter id="qos">
227 <title>Multiple queues and QoS support</title> 236 <title>Multiple queues and QoS support</title>
228 <para>TBD</para> 237 <para>TBD</para>
diff --git a/Documentation/DocBook/procfs_example.c b/Documentation/DocBook/procfs_example.c
index 8c6396e4bf31..a5b11793b1e0 100644
--- a/Documentation/DocBook/procfs_example.c
+++ b/Documentation/DocBook/procfs_example.c
@@ -117,9 +117,6 @@ static int __init init_procfs_example(void)
117 rv = -ENOMEM; 117 rv = -ENOMEM;
118 goto out; 118 goto out;
119 } 119 }
120
121 example_dir->owner = THIS_MODULE;
122
123 /* create jiffies using convenience function */ 120 /* create jiffies using convenience function */
124 jiffies_file = create_proc_read_entry("jiffies", 121 jiffies_file = create_proc_read_entry("jiffies",
125 0444, example_dir, 122 0444, example_dir,
@@ -130,8 +127,6 @@ static int __init init_procfs_example(void)
130 goto no_jiffies; 127 goto no_jiffies;
131 } 128 }
132 129
133 jiffies_file->owner = THIS_MODULE;
134
135 /* create foo and bar files using same callback 130 /* create foo and bar files using same callback
136 * functions 131 * functions
137 */ 132 */
@@ -146,7 +141,6 @@ static int __init init_procfs_example(void)
146 foo_file->data = &foo_data; 141 foo_file->data = &foo_data;
147 foo_file->read_proc = proc_read_foobar; 142 foo_file->read_proc = proc_read_foobar;
148 foo_file->write_proc = proc_write_foobar; 143 foo_file->write_proc = proc_write_foobar;
149 foo_file->owner = THIS_MODULE;
150 144
151 bar_file = create_proc_entry("bar", 0644, example_dir); 145 bar_file = create_proc_entry("bar", 0644, example_dir);
152 if(bar_file == NULL) { 146 if(bar_file == NULL) {
@@ -159,7 +153,6 @@ static int __init init_procfs_example(void)
159 bar_file->data = &bar_data; 153 bar_file->data = &bar_data;
160 bar_file->read_proc = proc_read_foobar; 154 bar_file->read_proc = proc_read_foobar;
161 bar_file->write_proc = proc_write_foobar; 155 bar_file->write_proc = proc_write_foobar;
162 bar_file->owner = THIS_MODULE;
163 156
164 /* create symlink */ 157 /* create symlink */
165 symlink = proc_symlink("jiffies_too", example_dir, 158 symlink = proc_symlink("jiffies_too", example_dir,
@@ -169,8 +162,6 @@ static int __init init_procfs_example(void)
169 goto no_symlink; 162 goto no_symlink;
170 } 163 }
171 164
172 symlink->owner = THIS_MODULE;
173
174 /* everything OK */ 165 /* everything OK */
175 printk(KERN_INFO "%s %s initialised\n", 166 printk(KERN_INFO "%s %s initialised\n",
176 MODULE_NAME, MODULE_VERS); 167 MODULE_NAME, MODULE_VERS);
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 000000000000..b0756d0fd579
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,89 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="Tracepoints">
6 <bookinfo>
7 <title>The Linux Kernel Tracepoint API</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Jason</firstname>
12 <surname>Baron</surname>
13 <affiliation>
14 <address>
15 <email>jbaron@redhat.com</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <legalnotice>
22 <para>
23 This documentation is free software; you can redistribute
24 it and/or modify it under the terms of the GNU General Public
25 License as published by the Free Software Foundation; either
26 version 2 of the License, or (at your option) any later
27 version.
28 </para>
29
30 <para>
31 This program is distributed in the hope that it will be
32 useful, but WITHOUT ANY WARRANTY; without even the implied
33 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
34 See the GNU General Public License for more details.
35 </para>
36
37 <para>
38 You should have received a copy of the GNU General Public
39 License along with this program; if not, write to the Free
40 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
41 MA 02111-1307 USA
42 </para>
43
44 <para>
45 For more details see the file COPYING in the source
46 distribution of Linux.
47 </para>
48 </legalnotice>
49 </bookinfo>
50
51 <toc></toc>
52 <chapter id="intro">
53 <title>Introduction</title>
54 <para>
55 Tracepoints are static probe points that are located in strategic points
56 throughout the kernel. 'Probes' register/unregister with tracepoints
57 via a callback mechanism. The 'probes' are strictly typed functions that
58 are passed a unique set of parameters defined by each tracepoint.
59 </para>
60
61 <para>
62 From this simple callback mechanism, 'probes' can be used to profile, debug,
63 and understand kernel behavior. There are a number of tools that provide a
64 framework for using 'probes'. These tools include Systemtap, ftrace, and
65 LTTng.
66 </para>
67
68 <para>
69 Tracepoints are defined in a number of header files via various macros. Thus,
70 the purpose of this document is to provide a clear accounting of the available
71 tracepoints. The intention is to understand not only what tracepoints are
72 available but also to understand where future tracepoints might be added.
73 </para>
74
75 <para>
76 The API presented has functions of the form:
77 <function>trace_tracepointname(function parameters)</function>. These are the
78 tracepoints callbacks that are found throughout the code. Registering and
79 unregistering probes with these callback sites is covered in the
80 <filename>Documentation/trace/*</filename> directory.
81 </para>
82 </chapter>
83
84 <chapter id="irq">
85 <title>IRQ</title>
86!Iinclude/trace/events/irq.h
87 </chapter>
88
89</book>
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index 52e1b79ce0e6..8f6e3b2403c7 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -42,6 +42,13 @@ GPL version 2.
42 42
43<revhistory> 43<revhistory>
44 <revision> 44 <revision>
45 <revnumber>0.8</revnumber>
46 <date>2008-12-24</date>
47 <authorinitials>hjk</authorinitials>
48 <revremark>Added name attributes in mem and portio sysfs directories.
49 </revremark>
50 </revision>
51 <revision>
45 <revnumber>0.7</revnumber> 52 <revnumber>0.7</revnumber>
46 <date>2008-12-23</date> 53 <date>2008-12-23</date>
47 <authorinitials>hjk</authorinitials> 54 <authorinitials>hjk</authorinitials>
@@ -303,12 +310,19 @@ interested in translating it, please email me
303 appear if the size of the mapping is not 0. 310 appear if the size of the mapping is not 0.
304</para> 311</para>
305<para> 312<para>
306 Each <filename>mapX/</filename> directory contains two read-only files 313 Each <filename>mapX/</filename> directory contains four read-only files
307 that show start address and size of the memory: 314 that show attributes of the memory:
308</para> 315</para>
309<itemizedlist> 316<itemizedlist>
310<listitem> 317<listitem>
311 <para> 318 <para>
319 <filename>name</filename>: A string identifier for this mapping. This
320 is optional, the string can be empty. Drivers can set this to make it
321 easier for userspace to find the correct mapping.
322 </para>
323</listitem>
324<listitem>
325 <para>
312 <filename>addr</filename>: The address of memory that can be mapped. 326 <filename>addr</filename>: The address of memory that can be mapped.
313 </para> 327 </para>
314</listitem> 328</listitem>
@@ -366,12 +380,19 @@ offset = N * getpagesize();
366 <filename>/sys/class/uio/uioX/portio/</filename>. 380 <filename>/sys/class/uio/uioX/portio/</filename>.
367</para> 381</para>
368<para> 382<para>
369 Each <filename>portX/</filename> directory contains three read-only 383 Each <filename>portX/</filename> directory contains four read-only
370 files that show start, size, and type of the port region: 384 files that show name, start, size, and type of the port region:
371</para> 385</para>
372<itemizedlist> 386<itemizedlist>
373<listitem> 387<listitem>
374 <para> 388 <para>
389 <filename>name</filename>: A string identifier for this port region.
390 The string is optional and can be empty. Drivers can set it to make it
391 easier for userspace to find a certain port region.
392 </para>
393</listitem>
394<listitem>
395 <para>
375 <filename>start</filename>: The first port of this region. 396 <filename>start</filename>: The first port of this region.
376 </para> 397 </para>
377</listitem> 398</listitem>
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl
index 87a7c07ab658..7a2e0e98986a 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl
@@ -1,11 +1,11 @@
1<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN"> 1<?xml version="1.0" encoding="UTF-8"?>
2 2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3<book> 3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4<?dbhtml filename="index.html">
5 4
6<!-- ****************************************************** --> 5<!-- ****************************************************** -->
7<!-- Header --> 6<!-- Header -->
8<!-- ****************************************************** --> 7<!-- ****************************************************** -->
8<book id="Writing-an-ALSA-Driver">
9 <bookinfo> 9 <bookinfo>
10 <title>Writing an ALSA Driver</title> 10 <title>Writing an ALSA Driver</title>
11 <author> 11 <author>
@@ -492,9 +492,9 @@
492 } 492 }
493 493
494 /* (2) */ 494 /* (2) */
495 card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); 495 err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
496 if (card == NULL) 496 if (err < 0)
497 return -ENOMEM; 497 return err;
498 498
499 /* (3) */ 499 /* (3) */
500 err = snd_mychip_create(card, pci, &chip); 500 err = snd_mychip_create(card, pci, &chip);
@@ -590,8 +590,9 @@
590 <programlisting> 590 <programlisting>
591<![CDATA[ 591<![CDATA[
592 struct snd_card *card; 592 struct snd_card *card;
593 int err;
593 .... 594 ....
594 card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); 595 err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
595]]> 596]]>
596 </programlisting> 597 </programlisting>
597 </informalexample> 598 </informalexample>
@@ -809,26 +810,28 @@
809 810
810 <para> 811 <para>
811 As mentioned above, to create a card instance, call 812 As mentioned above, to create a card instance, call
812 <function>snd_card_new()</function>. 813 <function>snd_card_create()</function>.
813 814
814 <informalexample> 815 <informalexample>
815 <programlisting> 816 <programlisting>
816<![CDATA[ 817<![CDATA[
817 struct snd_card *card; 818 struct snd_card *card;
818 card = snd_card_new(index, id, module, extra_size); 819 int err;
820 err = snd_card_create(index, id, module, extra_size, &card);
819]]> 821]]>
820 </programlisting> 822 </programlisting>
821 </informalexample> 823 </informalexample>
822 </para> 824 </para>
823 825
824 <para> 826 <para>
825 The function takes four arguments, the card-index number, the 827 The function takes five arguments, the card-index number, the
826 id string, the module pointer (usually 828 id string, the module pointer (usually
827 <constant>THIS_MODULE</constant>), 829 <constant>THIS_MODULE</constant>),
828 and the size of extra-data space. The last argument is used to 830 the size of extra-data space, and the pointer to return the
831 card instance. The extra_size argument is used to
829 allocate card-&gt;private_data for the 832 allocate card-&gt;private_data for the
830 chip-specific data. Note that these data 833 chip-specific data. Note that these data
831 are allocated by <function>snd_card_new()</function>. 834 are allocated by <function>snd_card_create()</function>.
832 </para> 835 </para>
833 </section> 836 </section>
834 837
@@ -915,15 +918,16 @@
915 </para> 918 </para>
916 919
917 <section id="card-management-chip-specific-snd-card-new"> 920 <section id="card-management-chip-specific-snd-card-new">
918 <title>1. Allocating via <function>snd_card_new()</function>.</title> 921 <title>1. Allocating via <function>snd_card_create()</function>.</title>
919 <para> 922 <para>
920 As mentioned above, you can pass the extra-data-length 923 As mentioned above, you can pass the extra-data-length
921 to the 4th argument of <function>snd_card_new()</function>, i.e. 924 to the 4th argument of <function>snd_card_create()</function>, i.e.
922 925
923 <informalexample> 926 <informalexample>
924 <programlisting> 927 <programlisting>
925<![CDATA[ 928<![CDATA[
926 card = snd_card_new(index[dev], id[dev], THIS_MODULE, sizeof(struct mychip)); 929 err = snd_card_create(index[dev], id[dev], THIS_MODULE,
930 sizeof(struct mychip), &card);
927]]> 931]]>
928 </programlisting> 932 </programlisting>
929 </informalexample> 933 </informalexample>
@@ -952,8 +956,8 @@
952 956
953 <para> 957 <para>
954 After allocating a card instance via 958 After allocating a card instance via
955 <function>snd_card_new()</function> (with 959 <function>snd_card_create()</function> (with
956 <constant>NULL</constant> on the 4th arg), call 960 <constant>0</constant> on the 4th arg), call
957 <function>kzalloc()</function>. 961 <function>kzalloc()</function>.
958 962
959 <informalexample> 963 <informalexample>
@@ -961,7 +965,7 @@
961<![CDATA[ 965<![CDATA[
962 struct snd_card *card; 966 struct snd_card *card;
963 struct mychip *chip; 967 struct mychip *chip;
964 card = snd_card_new(index[dev], id[dev], THIS_MODULE, NULL); 968 err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
965 ..... 969 .....
966 chip = kzalloc(sizeof(*chip), GFP_KERNEL); 970 chip = kzalloc(sizeof(*chip), GFP_KERNEL);
967]]> 971]]>
@@ -1133,8 +1137,8 @@
1133 if (err < 0) 1137 if (err < 0)
1134 return err; 1138 return err;
1135 /* check PCI availability (28bit DMA) */ 1139 /* check PCI availability (28bit DMA) */
1136 if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 || 1140 if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 ||
1137 pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) { 1141 pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) {
1138 printk(KERN_ERR "error to set 28bit mask DMA\n"); 1142 printk(KERN_ERR "error to set 28bit mask DMA\n");
1139 pci_disable_device(pci); 1143 pci_disable_device(pci);
1140 return -ENXIO; 1144 return -ENXIO;
@@ -1248,8 +1252,8 @@
1248 err = pci_enable_device(pci); 1252 err = pci_enable_device(pci);
1249 if (err < 0) 1253 if (err < 0)
1250 return err; 1254 return err;
1251 if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 || 1255 if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 ||
1252 pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) { 1256 pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) {
1253 printk(KERN_ERR "error to set 28bit mask DMA\n"); 1257 printk(KERN_ERR "error to set 28bit mask DMA\n");
1254 pci_disable_device(pci); 1258 pci_disable_device(pci);
1255 return -ENXIO; 1259 return -ENXIO;
@@ -5750,8 +5754,9 @@ struct _snd_pcm_runtime {
5750 .... 5754 ....
5751 struct snd_card *card; 5755 struct snd_card *card;
5752 struct mychip *chip; 5756 struct mychip *chip;
5757 int err;
5753 .... 5758 ....
5754 card = snd_card_new(index[dev], id[dev], THIS_MODULE, NULL); 5759 err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
5755 .... 5760 ....
5756 chip = kzalloc(sizeof(*chip), GFP_KERNEL); 5761 chip = kzalloc(sizeof(*chip), GFP_KERNEL);
5757 .... 5762 ....
@@ -5763,7 +5768,7 @@ struct _snd_pcm_runtime {
5763 </informalexample> 5768 </informalexample>
5764 5769
5765 When you created the chip data with 5770 When you created the chip data with
5766 <function>snd_card_new()</function>, it's anyway accessible 5771 <function>snd_card_create()</function>, it's anyway accessible
5767 via <structfield>private_data</structfield> field. 5772 via <structfield>private_data</structfield> field.
5768 5773
5769 <informalexample> 5774 <informalexample>
@@ -5775,9 +5780,10 @@ struct _snd_pcm_runtime {
5775 .... 5780 ....
5776 struct snd_card *card; 5781 struct snd_card *card;
5777 struct mychip *chip; 5782 struct mychip *chip;
5783 int err;
5778 .... 5784 ....
5779 card = snd_card_new(index[dev], id[dev], THIS_MODULE, 5785 err = snd_card_create(index[dev], id[dev], THIS_MODULE,
5780 sizeof(struct mychip)); 5786 sizeof(struct mychip), &card);
5781 .... 5787 ....
5782 chip = card->private_data; 5788 chip = card->private_data;
5783 .... 5789 ....
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 256defd7e174..dcf7acc720e1 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -4,506 +4,356 @@
4 Revised Feb 12, 2004 by Martine Silbermann 4 Revised Feb 12, 2004 by Martine Silbermann
5 email: Martine.Silbermann@hp.com 5 email: Martine.Silbermann@hp.com
6 Revised Jun 25, 2004 by Tom L Nguyen 6 Revised Jun 25, 2004 by Tom L Nguyen
7 Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com>
8 Copyright 2003, 2008 Intel Corporation
7 9
81. About this guide 101. About this guide
9 11
10This guide describes the basics of Message Signaled Interrupts (MSI), 12This guide describes the basics of Message Signaled Interrupts (MSIs),
11the advantages of using MSI over traditional interrupt mechanisms, 13the advantages of using MSI over traditional interrupt mechanisms, how
12and how to enable your driver to use MSI or MSI-X. Also included is 14to change your driver to use MSI or MSI-X and some basic diagnostics to
13a Frequently Asked Questions (FAQ) section. 15try if a device doesn't support MSIs.
14
151.1 Terminology
16
17PCI devices can be single-function or multi-function. In either case,
18when this text talks about enabling or disabling MSI on a "device
19function," it is referring to one specific PCI device and function and
20not to all functions on a PCI device (unless the PCI device has only
21one function).
22
232. Copyright 2003 Intel Corporation
24
253. What is MSI/MSI-X?
26
27Message Signaled Interrupt (MSI), as described in the PCI Local Bus
28Specification Revision 2.3 or later, is an optional feature, and a
29required feature for PCI Express devices. MSI enables a device function
30to request service by sending an Inbound Memory Write on its PCI bus to
31the FSB as a Message Signal Interrupt transaction. Because MSI is
32generated in the form of a Memory Write, all transaction conditions,
33such as a Retry, Master-Abort, Target-Abort or normal completion, are
34supported.
35
36A PCI device that supports MSI must also support pin IRQ assertion
37interrupt mechanism to provide backward compatibility for systems that
38do not support MSI. In systems which support MSI, the bus driver is
39responsible for initializing the message address and message data of
40the device function's MSI/MSI-X capability structure during device
41initial configuration.
42
43An MSI capable device function indicates MSI support by implementing
44the MSI/MSI-X capability structure in its PCI capability list. The
45device function may implement both the MSI capability structure and
46the MSI-X capability structure; however, the bus driver should not
47enable both.
48
49The MSI capability structure contains Message Control register,
50Message Address register and Message Data register. These registers
51provide the bus driver control over MSI. The Message Control register
52indicates the MSI capability supported by the device. The Message
53Address register specifies the target address and the Message Data
54register specifies the characteristics of the message. To request
55service, the device function writes the content of the Message Data
56register to the target address. The device and its software driver
57are prohibited from writing to these registers.
58
59The MSI-X capability structure is an optional extension to MSI. It
60uses an independent and separate capability structure. There are
61some key advantages to implementing the MSI-X capability structure
62over the MSI capability structure as described below.
63
64 - Support a larger maximum number of vectors per function.
65
66 - Provide the ability for system software to configure
67 each vector with an independent message address and message
68 data, specified by a table that resides in Memory Space.
69
70 - MSI and MSI-X both support per-vector masking. Per-vector
71 masking is an optional extension of MSI but a required
72 feature for MSI-X. Per-vector masking provides the kernel the
73 ability to mask/unmask a single MSI while running its
74 interrupt service routine. If per-vector masking is
75 not supported, then the device driver should provide the
76 hardware/software synchronization to ensure that the device
77 generates MSI when the driver wants it to do so.
78
794. Why use MSI?
80
81As a benefit to the simplification of board design, MSI allows board
82designers to remove out-of-band interrupt routing. MSI is another
83step towards a legacy-free environment.
84
85Due to increasing pressure on chipset and processor packages to
86reduce pin count, the need for interrupt pins is expected to
87diminish over time. Devices, due to pin constraints, may implement
88messages to increase performance.
89
90PCI Express endpoints uses INTx emulation (in-band messages) instead
91of IRQ pin assertion. Using INTx emulation requires interrupt
92sharing among devices connected to the same node (PCI bridge) while
93MSI is unique (non-shared) and does not require BIOS configuration
94support. As a result, the PCI Express technology requires MSI
95support for better interrupt performance.
96
97Using MSI enables the device functions to support two or more
98vectors, which can be configured to target different CPUs to
99increase scalability.
100
1015. Configuring a driver to use MSI/MSI-X
102
103By default, the kernel will not enable MSI/MSI-X on all devices that
104support this capability. The CONFIG_PCI_MSI kernel option
105must be selected to enable MSI/MSI-X support.
106
1075.1 Including MSI/MSI-X support into the kernel
108
109To allow MSI/MSI-X capable device drivers to selectively enable
110MSI/MSI-X (using pci_enable_msi()/pci_enable_msix() as described
111below), the VECTOR based scheme needs to be enabled by setting
112CONFIG_PCI_MSI during kernel config.
113
114Since the target of the inbound message is the local APIC, providing
115CONFIG_X86_LOCAL_APIC must be enabled as well as CONFIG_PCI_MSI.
116
1175.2 Configuring for MSI support
118
119Due to the non-contiguous fashion in vector assignment of the
120existing Linux kernel, this version does not support multiple
121messages regardless of a device function is capable of supporting
122more than one vector. To enable MSI on a device function's MSI
123capability structure requires a device driver to call the function
124pci_enable_msi() explicitly.
125
1265.2.1 API pci_enable_msi
127 16
128int pci_enable_msi(struct pci_dev *dev)
129 17
130With this new API, a device driver that wants to have MSI 182. What are MSIs?
131enabled on its device function must call this API to enable MSI.
132A successful call will initialize the MSI capability structure
133with ONE vector, regardless of whether a device function is
134capable of supporting multiple messages. This vector replaces the
135pre-assigned dev->irq with a new MSI vector. To avoid a conflict
136of the new assigned vector with existing pre-assigned vector requires
137a device driver to call this API before calling request_irq().
138 19
1395.2.2 API pci_disable_msi 20A Message Signaled Interrupt is a write from the device to a special
21address which causes an interrupt to be received by the CPU.
140 22
141void pci_disable_msi(struct pci_dev *dev) 23The MSI capability was first specified in PCI 2.2 and was later enhanced
24in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X
25capability was also introduced with PCI 3.0. It supports more interrupts
26per device than MSI and allows interrupts to be independently configured.
142 27
143This API should always be used to undo the effect of pci_enable_msi() 28Devices may support both MSI and MSI-X, but only one can be enabled at
144when a device driver is unloading. This API restores dev->irq with 29a time.
145the pre-assigned IOAPIC vector and switches a device's interrupt
146mode to PCI pin-irq assertion/INTx emulation mode.
147
148Note that a device driver should always call free_irq() on the MSI vector
149that it has done request_irq() on before calling this API. Failure to do
150so results in a BUG_ON() and a device will be left with MSI enabled and
151leaks its vector.
152
1535.2.3 MSI mode vs. legacy mode diagram
154
155The below diagram shows the events which switch the interrupt
156mode on the MSI-capable device function between MSI mode and
157PIN-IRQ assertion mode.
158
159 ------------ pci_enable_msi ------------------------
160 | | <=============== | |
161 | MSI MODE | | PIN-IRQ ASSERTION MODE |
162 | | ===============> | |
163 ------------ pci_disable_msi ------------------------
164
165
166Figure 1. MSI Mode vs. Legacy Mode
167
168In Figure 1, a device operates by default in legacy mode. Legacy
169in this context means PCI pin-irq assertion or PCI-Express INTx
170emulation. A successful MSI request (using pci_enable_msi()) switches
171a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
172stored in dev->irq will be saved by the PCI subsystem and a new
173assigned MSI vector will replace dev->irq.
174
175To return back to its default mode, a device driver should always call
176pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
177device driver should always call free_irq() on the MSI vector it has
178done request_irq() on before calling pci_disable_msi(). Failure to do
179so results in a BUG_ON() and a device will be left with MSI enabled and
180leaks its vector. Otherwise, the PCI subsystem restores a device's
181dev->irq with a pre-assigned IOAPIC vector and marks the released
182MSI vector as unused.
183
184Once being marked as unused, there is no guarantee that the PCI
185subsystem will reserve this MSI vector for a device. Depending on
186the availability of current PCI vector resources and the number of
187MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
188
189For the case where the PCI subsystem re-assigns this MSI vector to
190another driver, a request to switch back to MSI mode may result
191in being assigned a different MSI vector or a failure if no more
192vectors are available.
193
1945.3 Configuring for MSI-X support
195
196Due to the ability of the system software to configure each vector of
197the MSI-X capability structure with an independent message address
198and message data, the non-contiguous fashion in vector assignment of
199the existing Linux kernel has no impact on supporting multiple
200messages on an MSI-X capable device functions. To enable MSI-X on
201a device function's MSI-X capability structure requires its device
202driver to call the function pci_enable_msix() explicitly.
203
204The function pci_enable_msix(), once invoked, enables either
205all or nothing, depending on the current availability of PCI vector
206resources. If the PCI vector resources are available for the number
207of vectors requested by a device driver, this function will configure
208the MSI-X table of the MSI-X capability structure of a device with
209requested messages. To emphasize this reason, for example, a device
210may be capable for supporting the maximum of 32 vectors while its
211software driver usually may request 4 vectors. It is recommended
212that the device driver should call this function once during the
213initialization phase of the device driver.
214
215Unlike the function pci_enable_msi(), the function pci_enable_msix()
216does not replace the pre-assigned IOAPIC dev->irq with a new MSI
217vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
218into the field vector of each element contained in a second argument.
219Note that the pre-assigned IOAPIC dev->irq is valid only if the device
220operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
221using dev->irq by the device driver to request for interrupt service
222may result in unpredictable behavior.
223
224For each MSI-X vector granted, a device driver is responsible for calling
225other functions like request_irq(), enable_irq(), etc. to enable
226this vector with its corresponding interrupt service handler. It is
227a device driver's choice to assign all vectors with the same
228interrupt service handler or each vector with a unique interrupt
229service handler.
230
2315.3.1 Handling MMIO address space of MSI-X Table
232
233The PCI 3.0 specification has implementation notes that MMIO address
234space for a device's MSI-X structure should be isolated so that the
235software system can set different pages for controlling accesses to the
236MSI-X structure. The implementation of MSI support requires the PCI
237subsystem, not a device driver, to maintain full control of the MSI-X
238table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
239table/MSI-X PBA. A device driver should not access the MMIO address
240space of the MSI-X table/MSI-X PBA.
241
2425.3.2 API pci_enable_msix
243 30
244int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
245 31
246This API enables a device driver to request the PCI subsystem 323. Why use MSIs?
247to enable MSI-X messages on its hardware device. Depending on 33
248the availability of PCI vectors resources, the PCI subsystem enables 34There are three reasons why using MSIs can give an advantage over
249either all or none of the requested vectors. 35traditional pin-based interrupts.
36
37Pin-based PCI interrupts are often shared amongst several devices.
38To support this, the kernel must call each interrupt handler associated
39with an interrupt, which leads to reduced performance for the system as
40a whole. MSIs are never shared, so this problem cannot arise.
41
42When a device writes data to memory, then raises a pin-based interrupt,
43it is possible that the interrupt may arrive before all the data has
44arrived in memory (this becomes more likely with devices behind PCI-PCI
45bridges). In order to ensure that all the data has arrived in memory,
46the interrupt handler must read a register on the device which raised
47the interrupt. PCI transaction ordering rules require that all the data
48arrives in memory before the value can be returned from the register.
49Using MSIs avoids this problem as the interrupt-generating write cannot
50pass the data writes, so by the time the interrupt is raised, the driver
51knows that all the data has arrived in memory.
52
53PCI devices can only support a single pin-based interrupt per function.
54Often drivers have to query the device to find out what event has
55occurred, slowing down interrupt handling for the common case. With
56MSIs, a device can support more interrupts, allowing each interrupt
57to be specialised to a different purpose. One possible design gives
58infrequent conditions (such as errors) their own interrupt which allows
59the driver to handle the normal interrupt handling path more efficiently.
60Other possible designs include giving one interrupt to each packet queue
61in a network card or each port in a storage controller.
62
63
644. How to use MSIs
65
66PCI devices are initialised to use pin-based interrupts. The device
67driver has to set up the device to use MSI or MSI-X. Not all machines
68support MSIs correctly, and for those machines, the APIs described below
69will simply fail and the device will continue to use pin-based interrupts.
70
714.1 Include kernel support for MSIs
72
73To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
74option enabled. This option is only available on some architectures,
75and it may depend on some other options also being set. For example,
76on x86, you must also enable X86_UP_APIC or SMP in order to see the
77CONFIG_PCI_MSI option.
78
794.2 Using MSI
80
81Most of the hard work is done for the driver in the PCI layer. It simply
82has to request that the PCI layer set up the MSI capability for this
83device.
84
854.2.1 pci_enable_msi
86
87int pci_enable_msi(struct pci_dev *dev)
88
89A successful call will allocate ONE interrupt to the device, regardless
90of how many MSIs the device supports. The device will be switched from
91pin-based interrupt mode to MSI mode. The dev->irq number is changed
92to a new number which represents the message signaled interrupt.
93This function should be called before the driver calls request_irq()
94since enabling MSIs disables the pin-based IRQ and the driver will not
95receive interrupts on the old interrupt.
96
974.2.2 pci_enable_msi_block
98
99int pci_enable_msi_block(struct pci_dev *dev, int count)
100
101This variation on the above call allows a device driver to request multiple
102MSIs. The MSI specification only allows interrupts to be allocated in
103powers of two, up to a maximum of 2^5 (32).
104
105If this function returns 0, it has succeeded in allocating at least as many
106interrupts as the driver requested (it may have allocated more in order
107to satisfy the power-of-two requirement). In this case, the function
108enables MSI on this device and updates dev->irq to be the lowest of
109the new interrupts assigned to it. The other interrupts assigned to
110the device are in the range dev->irq to dev->irq + count - 1.
111
112If this function returns a negative number, it indicates an error and
113the driver should not attempt to request any more MSI interrupts for
114this device. If this function returns a positive number, it will be
115less than 'count' and indicate the number of interrupts that could have
116been allocated. In neither case will the irq value have been
117updated, nor will the device have been switched into MSI mode.
118
119The device driver must decide what action to take if
120pci_enable_msi_block() returns a value less than the number asked for.
121Some devices can make use of fewer interrupts than the maximum they
122request; in this case the driver should call pci_enable_msi_block()
123again. Note that it is not guaranteed to succeed, even when the
124'count' has been reduced to the value returned from a previous call to
125pci_enable_msi_block(). This is because there are multiple constraints
126on the number of vectors that can be allocated; pci_enable_msi_block()
127will return as soon as it finds any constraint that doesn't allow the
128call to succeed.
129
1304.2.3 pci_disable_msi
131
132void pci_disable_msi(struct pci_dev *dev)
250 133
251Argument 'dev' points to the device (pci_dev) structure. 134This function should be used to undo the effect of pci_enable_msi() or
135pci_enable_msi_block(). Calling it restores dev->irq to the pin-based
136interrupt number and frees the previously allocated message signaled
137interrupt(s). The interrupt may subsequently be assigned to another
138device, so drivers should not cache the value of dev->irq.
252 139
253Argument 'entries' is a pointer to an array of msix_entry structs. 140A device driver must always call free_irq() on the interrupt(s)
254The number of entries is indicated in argument 'nvec'. 141for which it has called request_irq() before calling this function.
255struct msix_entry is defined in /driver/pci/msi.h: 142Failure to do so will result in a BUG_ON(), the device will be left with
143MSI enabled and will leak its vector.
144
1454.3 Using MSI-X
146
147The MSI-X capability is much more flexible than the MSI capability.
148It supports up to 2048 interrupts, each of which can be controlled
149independently. To support this flexibility, drivers must use an array of
150`struct msix_entry':
256 151
257struct msix_entry { 152struct msix_entry {
258 u16 vector; /* kernel uses to write alloc vector */ 153 u16 vector; /* kernel uses to write alloc vector */
259 u16 entry; /* driver uses to specify entry */ 154 u16 entry; /* driver uses to specify entry */
260}; 155};
261 156
262A device driver is responsible for initializing the field 'entry' of 157This allows for the device to use these interrupts in a sparse fashion;
263each element with a unique entry supported by MSI-X table. Otherwise, 158for example it could use interrupts 3 and 1027 and allocate only a
264-EINVAL will be returned as a result. A successful return of zero 159two-element array. The driver is expected to fill in the 'entry' value
265indicates the PCI subsystem completed initializing each of the requested 160in each element of the array to indicate which entries it wants the kernel
266entries of the MSI-X table with message address and message data. 161to assign interrupts for. It is invalid to fill in two entries with the
267Last but not least, the PCI subsystem will write the 1:1 162same number.
268vector-to-entry mapping into the field 'vector' of each element. A 163
269device driver is responsible for keeping track of allocated MSI-X 1644.3.1 pci_enable_msix
270vectors in its internal data structure. 165
271 166int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
272A return of zero indicates that the number of MSI-X vectors was 167
273successfully allocated. A return of greater than zero indicates 168Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
274MSI-X vector shortage. Or a return of less than zero indicates 169The 'entries' argument is a pointer to an array of msix_entry structs
275a failure. This failure may be a result of duplicate entries 170which should be at least 'nvec' entries in size. On success, the
276specified in second argument, or a result of no available vector, 171function will return 0 and the device will have been switched into
277or a result of failing to initialize MSI-X table entries. 172MSI-X interrupt mode. The 'vector' elements in each entry will have
278 173been filled in with the interrupt number. The driver should then call
2795.3.3 API pci_disable_msix 174request_irq() for each 'vector' that it decides to use.
175
176If this function returns a negative number, it indicates an error and
177the driver should not attempt to allocate any more MSI-X interrupts for
178this device. If it returns a positive number, it indicates the maximum
179number of interrupt vectors that could have been allocated. See example
180below.
181
182This function, in contrast with pci_enable_msi(), does not adjust
183dev->irq. The device will not generate interrupts for this interrupt
184number once MSI-X is enabled. The device driver is responsible for
185keeping track of the interrupts assigned to the MSI-X vectors so it can
186free them again later.
187
188Device drivers should normally call this function once per device
189during the initialization phase.
190
191It is ideal if drivers can cope with a variable number of MSI-X interrupts,
192there are many reasons why the platform may not be able to provide the
193exact number a driver asks for.
194
195A request loop to achieve that might look like:
196
197static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
198{
199 while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
200 rc = pci_enable_msix(adapter->pdev,
201 adapter->msix_entries, nvec);
202 if (rc > 0)
203 nvec = rc;
204 else
205 return rc;
206 }
207
208 return -ENOSPC;
209}
210
2114.3.2 pci_disable_msix
280 212
281void pci_disable_msix(struct pci_dev *dev) 213void pci_disable_msix(struct pci_dev *dev)
282 214
283This API should always be used to undo the effect of pci_enable_msix() 215This API should be used to undo the effect of pci_enable_msix(). It frees
284when a device driver is unloading. Note that a device driver should 216the previously allocated message signaled interrupts. The interrupts may
285always call free_irq() on all MSI-X vectors it has done request_irq() 217subsequently be assigned to another device, so drivers should not cache
286on before calling this API. Failure to do so results in a BUG_ON() and 218the value of the 'vector' elements over a call to pci_disable_msix().
287a device will be left with MSI-X enabled and leaks its vectors. 219
288 220A device driver must always call free_irq() on the interrupt(s)
2895.3.4 MSI-X mode vs. legacy mode diagram 221for which it has called request_irq() before calling this function.
290 222Failure to do so will result in a BUG_ON(), the device will be left with
291The below diagram shows the events which switch the interrupt 223MSI enabled and will leak its vector.
292mode on the MSI-X capable device function between MSI-X mode and 224
293PIN-IRQ assertion mode (legacy). 2254.3.3 The MSI-X Table
294 226
295 ------------ pci_enable_msix(,,n) ------------------------ 227The MSI-X capability specifies a BAR and offset within that BAR for the
296 | | <=============== | | 228MSI-X Table. This address is mapped by the PCI subsystem, and should not
297 | MSI-X MODE | | PIN-IRQ ASSERTION MODE | 229be accessed directly by the device driver. If the driver wishes to
298 | | ===============> | | 230mask or unmask an interrupt, it should call disable_irq() / enable_irq().
299 ------------ pci_disable_msix ------------------------ 231
300 2324.4 Handling devices implementing both MSI and MSI-X capabilities
301Figure 2. MSI-X Mode vs. Legacy Mode 233
302 234If a device implements both MSI and MSI-X capabilities, it can
303In Figure 2, a device operates by default in legacy mode. A 235run in either MSI mode or MSI-X mode but not both simultaneously.
304successful MSI-X request (using pci_enable_msix()) switches a 236This is a requirement of the PCI spec, and it is enforced by the
305device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector 237PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or
306stored in dev->irq will be saved by the PCI subsystem; however, 238pci_enable_msix() when MSI is already enabled will result in an error.
307unlike MSI mode, the PCI subsystem will not replace dev->irq with 239If a device driver wishes to switch between MSI and MSI-X at runtime,
308assigned MSI-X vector because the PCI subsystem already writes the 1:1 240it must first quiesce the device, then switch it back to pin-interrupt
309vector-to-entry mapping into the field 'vector' of each element 241mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
310specified in second argument. 242operation. This is not expected to be a common operation but may be
311 243useful for debugging or testing during development.
312To return back to its default mode, a device driver should always call 244
313pci_disable_msix() to undo the effect of pci_enable_msix(). Note that 2454.5 Considerations when using MSIs
314a device driver should always call free_irq() on all MSI-X vectors it 246
315has done request_irq() on before calling pci_disable_msix(). Failure 2474.5.1 Choosing between MSI-X and MSI
316to do so results in a BUG_ON() and a device will be left with MSI-X 248
317enabled and leaks its vectors. Otherwise, the PCI subsystem switches a 249If your device supports both MSI-X and MSI capabilities, you should use
318device function's interrupt mode from MSI-X mode to legacy mode and 250the MSI-X facilities in preference to the MSI facilities. As mentioned
319marks all allocated MSI-X vectors as unused. 251above, MSI-X supports any number of interrupts between 1 and 2048.
320 252In constrast, MSI is restricted to a maximum of 32 interrupts (and
321Once being marked as unused, there is no guarantee that the PCI 253must be a power of two). In addition, the MSI interrupt vectors must
322subsystem will reserve these MSI-X vectors for a device. Depending on 254be allocated consecutively, so the system may not be able to allocate
323the availability of current PCI vector resources and the number of 255as many vectors for MSI as it could for MSI-X. On some platforms, MSI
324MSI/MSI-X requests from other drivers, these MSI-X vectors may be 256interrupts must all be targetted at the same set of CPUs whereas MSI-X
325re-assigned. 257interrupts can all be targetted at different CPUs.
326 258
327For the case where the PCI subsystem re-assigned these MSI-X vectors 2594.5.2 Spinlocks
328to other drivers, a request to switch back to MSI-X mode may result 260
329being assigned with another set of MSI-X vectors or a failure if no 261Most device drivers have a per-device spinlock which is taken in the
330more vectors are available. 262interrupt handler. With pin-based interrupts or a single MSI, it is not
331 263necessary to disable interrupts (Linux guarantees the same interrupt will
3325.4 Handling function implementing both MSI and MSI-X capabilities 264not be re-entered). If a device uses multiple interrupts, the driver
333 265must disable interrupts while the lock is held. If the device sends
334For the case where a function implements both MSI and MSI-X 266a different interrupt, the driver will deadlock trying to recursively
335capabilities, the PCI subsystem enables a device to run either in MSI 267acquire the spinlock.
336mode or MSI-X mode but not both. A device driver determines whether it 268
337wants MSI or MSI-X enabled on its hardware device. Once a device 269There are two solutions. The first is to take the lock with
338driver requests for MSI, for example, it is prohibited from requesting 270spin_lock_irqsave() or spin_lock_irq() (see
339MSI-X; in other words, a device driver is not permitted to ping-pong 271Documentation/DocBook/kernel-locking). The second is to specify
340between MSI mod MSI-X mode during a run-time. 272IRQF_DISABLED to request_irq() so that the kernel runs the entire
341 273interrupt routine with interrupts disabled.
3425.5 Hardware requirements for MSI/MSI-X support 274
343 275If your MSI interrupt routine does not hold the lock for the whole time
344MSI/MSI-X support requires support from both system hardware and 276it is running, the first solution may be best. The second solution is
345individual hardware device functions. 277normally preferred as it avoids making two transitions from interrupt
346 278disabled to enabled and back again.
3475.5.1 Required x86 hardware support 279
348 2804.6 How to tell whether MSI/MSI-X is enabled on a device
349Since the target of MSI address is the local APIC CPU, enabling 281
350MSI/MSI-X support in the Linux kernel is dependent on whether existing 282Using 'lspci -v' (as root) may show some devices with "MSI", "Message
351system hardware supports local APIC. Users should verify that their 283Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
352system supports local APIC operation by testing that it runs when 284has an 'Enable' flag which will be followed with either "+" (enabled)
353CONFIG_X86_LOCAL_APIC=y. 285or "-" (disabled).
354 286
355In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set; 287
356however, in UP environment, users must manually set 2885. MSI quirks
357CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting 289
358CONFIG_PCI_MSI enables the VECTOR based scheme and the option for 290Several PCI chipsets or devices are known not to support MSIs.
359MSI-capable device drivers to selectively enable MSI/MSI-X. 291The PCI stack provides three ways to disable MSIs:
360 292
361Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X 2931. globally
362vector is allocated new during runtime and MSI/MSI-X support does not 2942. on all devices behind a specific bridge
363depend on BIOS support. This key independency enables MSI/MSI-X 2953. on a single device
364support on future IOxAPIC free platforms. 296
365 2975.1. Disabling MSIs globally
3665.5.2 Device hardware support 298
367 299Some host chipsets simply don't support MSIs properly. If we're
368The hardware device function supports MSI by indicating the 300lucky, the manufacturer knows this and has indicated it in the ACPI
369MSI/MSI-X capability structure on its PCI capability list. By 301FADT table. In this case, Linux will automatically disable MSIs.
370default, this capability structure will not be initialized by 302Some boards don't include this information in the table and so we have
371the kernel to enable MSI during the system boot. In other words, 303to detect them ourselves. The complete list of these is found near the
372the device function is running on its default pin assertion mode. 304quirk_disable_all_msi() function in drivers/pci/quirks.c.
373Note that in many cases the hardware supporting MSI have bugs, 305
374which may result in system hangs. The software driver of specific 306If you have a board which has problems with MSIs, you can pass pci=nomsi
375MSI-capable hardware is responsible for deciding whether to call 307on the kernel command line to disable MSIs on all devices. It would be
376pci_enable_msi or not. A return of zero indicates the kernel 308in your best interests to report the problem to linux-pci@vger.kernel.org
377successfully initialized the MSI/MSI-X capability structure of the 309including a full 'lspci -v' so we can add the quirks to the kernel.
378device function. The device function is now running on MSI/MSI-X mode. 310
379 3115.2. Disabling MSIs below a bridge
3805.6 How to tell whether MSI/MSI-X is enabled on device function 312
381 313Some PCI bridges are not able to route MSIs between busses properly.
382At the driver level, a return of zero from the function call of 314In this case, MSIs must be disabled on all devices behind the bridge.
383pci_enable_msi()/pci_enable_msix() indicates to a device driver that 315
384its device function is initialized successfully and ready to run in 316Some bridges allow you to enable MSIs by changing some bits in their
385MSI/MSI-X mode. 317PCI configuration space (especially the Hypertransport chipsets such
386 318as the nVidia nForce and Serverworks HT2000). As with host chipsets,
387At the user level, users can use the command 'cat /proc/interrupts' 319Linux mostly knows about them and automatically enables MSIs if it can.
388to display the vectors allocated for devices and their interrupt 320If you have a bridge which Linux doesn't yet know about, you can enable
389MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is 321MSIs in configuration space using whatever method you know works, then
390enabled on a SCSI Adaptec 39320D Ultra320 controller. 322enable MSIs on that bridge by doing:
391 323
392 CPU0 CPU1 324 echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
393 0: 324639 0 IO-APIC-edge timer 325
394 1: 1186 0 IO-APIC-edge i8042 326where $bridge is the PCI address of the bridge you've enabled (eg
395 2: 0 0 XT-PIC cascade 3270000:00:0e.0).
396 12: 2797 0 IO-APIC-edge i8042 328
397 14: 6543 0 IO-APIC-edge ide0 329To disable MSIs, echo 0 instead of 1. Changing this value should be
398 15: 1 0 IO-APIC-edge ide1 330done with caution as it can break interrupt handling for all devices
399169: 0 0 IO-APIC-level uhci-hcd 331below this bridge.
400185: 0 0 IO-APIC-level uhci-hcd 332
401193: 138 10 PCI-MSI aic79xx 333Again, please notify linux-pci@vger.kernel.org of any bridges that need
402201: 30 0 PCI-MSI aic79xx 334special handling.
403225: 30 0 IO-APIC-level aic7xxx 335
404233: 30 0 IO-APIC-level aic7xxx 3365.3. Disabling MSIs on a single device
405NMI: 0 0 337
406LOC: 324553 325068 338Some devices are known to have faulty MSI implementations. Usually this
407ERR: 0 339is handled in the individual device driver but occasionally it's necessary
408MIS: 0 340to handle this with a quirk. Some drivers have an option to disable use
409 341of MSI. While this is a convenient workaround for the driver author,
4106. MSI quirks 342it is not good practise, and should not be emulated.
411 343
412Several PCI chipsets or devices are known to not support MSI. 3445.4. Finding why MSIs are disabled on a device
413The PCI stack provides 3 possible levels of MSI disabling: 345
414* on a single device 346From the above three sections, you can see that there are many reasons
415* on all devices behind a specific bridge 347why MSIs may not be enabled for a given device. Your first step should
416* globally 348be to examine your dmesg carefully to determine whether MSIs are enabled
417 349for your machine. You should also check your .config to be sure you
4186.1. Disabling MSI on a single device 350have enabled CONFIG_PCI_MSI.
419 351
420Under some circumstances it might be required to disable MSI on a 352Then, 'lspci -t' gives the list of bridges above a device. Reading
421single device. This may be achieved by either not calling pci_enable_msi() 353/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1)
422or all, or setting the pci_dev->no_msi flag before (most of the time 354or disabled (0). If 0 is found in any of the msi_bus files belonging
423in a quirk). 355to bridges between the PCI root and the device, MSIs are disabled.
424 356
4256.2. Disabling MSI below a bridge 357It is also worth checking the device driver to see whether it supports MSIs.
426 358For example, it may contain calls to pci_enable_msi(), pci_enable_msix() or
427The vast majority of MSI quirks are required by PCI bridges not 359pci_enable_msi_block().
428being able to route MSI between busses. In this case, MSI have to be
429disabled on all devices behind this bridge. It is achieves by setting
430the PCI_BUS_FLAGS_NO_MSI flag in the pci_bus->bus_flags of the bridge
431subordinate bus. There is no need to set the same flag on bridges that
432are below the broken bridge. When pci_enable_msi() is called to enable
433MSI on a device, pci_msi_supported() takes care of checking the NO_MSI
434flag in all parent busses of the device.
435
436Some bridges actually support dynamic MSI support enabling/disabling
437by changing some bits in their PCI configuration space (especially
438the Hypertransport chipsets such as the nVidia nForce and Serverworks
439HT2000). It may then be required to update the NO_MSI flag on the
440corresponding devices in the sysfs hierarchy. To enable MSI support
441on device "0000:00:0e", do:
442
443 echo 1 > /sys/bus/pci/devices/0000:00:0e/msi_bus
444
445To disable MSI support, echo 0 instead of 1. Note that it should be
446used with caution since changing this value might break interrupts.
447
4486.3. Disabling MSI globally
449
450Some extreme cases may require to disable MSI globally on the system.
451For now, the only known case is a Serverworks PCI-X chipsets (MSI are
452not supported on several busses that are not all connected to the
453chipset in the Linux PCI hierarchy). In the vast majority of other
454cases, disabling only behind a specific bridge is enough.
455
456For debugging purpose, the user may also pass pci=nomsi on the kernel
457command-line to explicitly disable MSI globally. But, once the appro-
458priate quirks are added to the kernel, this option should not be
459required anymore.
460
4616.4. Finding why MSI cannot be enabled on a device
462
463Assuming that MSI are not enabled on a device, you should look at
464dmesg to find messages that quirks may output when disabling MSI
465on some devices, some bridges or even globally.
466Then, lspci -t gives the list of bridges above a device. Reading
467/sys/bus/pci/devices/0000:00:0e/msi_bus will tell you whether MSI
468are enabled (1) or disabled (0). In 0 is found in a single bridge
469msi_bus file above the device, MSI cannot be enabled.
470
4717. FAQ
472
473Q1. Are there any limitations on using the MSI?
474
475A1. If the PCI device supports MSI and conforms to the
476specification and the platform supports the APIC local bus,
477then using MSI should work.
478
479Q2. Will it work on all the Pentium processors (P3, P4, Xeon,
480AMD processors)? In P3 IPI's are transmitted on the APIC local
481bus and in P4 and Xeon they are transmitted on the system
482bus. Are there any implications with this?
483
484A2. MSI support enables a PCI device sending an inbound
485memory write (0xfeexxxxx as target address) on its PCI bus
486directly to the FSB. Since the message address has a
487redirection hint bit cleared, it should work.
488
489Q3. The target address 0xfeexxxxx will be translated by the
490Host Bridge into an interrupt message. Are there any
491limitations on the chipsets such as Intel 8xx, Intel e7xxx,
492or VIA?
493
494A3. If these chipsets support an inbound memory write with
495target address set as 0xfeexxxxx, as conformed to PCI
496specification 2.3 or latest, then it should work.
497
498Q4. From the driver point of view, if the MSI is lost because
499of errors occurring during inbound memory write, then it may
500wait forever. Is there a mechanism for it to recover?
501
502A4. Since the target of the transaction is an inbound memory
503write, all transaction termination conditions (Retry,
504Master-Abort, Target-Abort, or normal completion) are
505supported. A device sending an MSI must abide by all the PCI
506rules and conditions regarding that inbound memory write. So,
507if a retry is signaled it must retry, etc... We believe that
508the recommendation for Abort is also a retry (refer to PCI
509specification 2.3 or latest).
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
new file mode 100644
index 000000000000..fc73ef5d65b8
--- /dev/null
+++ b/Documentation/PCI/pci-iov-howto.txt
@@ -0,0 +1,99 @@
1 PCI Express I/O Virtualization Howto
2 Copyright (C) 2009 Intel Corporation
3 Yu Zhao <yu.zhao@intel.com>
4
5
61. Overview
7
81.1 What is SR-IOV
9
10Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
11capability which makes one physical device appear as multiple virtual
12devices. The physical device is referred to as Physical Function (PF)
13while the virtual devices are referred to as Virtual Functions (VF).
14Allocation of the VF can be dynamically controlled by the PF via
15registers encapsulated in the capability. By default, this feature is
16not enabled and the PF behaves as traditional PCIe device. Once it's
17turned on, each VF's PCI configuration space can be accessed by its own
18Bus, Device and Function Number (Routing ID). And each VF also has PCI
19Memory Space, which is used to map its register set. VF device driver
20operates on the register set so it can be functional and appear as a
21real existing PCI device.
22
232. User Guide
24
252.1 How can I enable SR-IOV capability
26
27The device driver (PF driver) will control the enabling and disabling
28of the capability via API provided by SR-IOV core. If the hardware
29has SR-IOV capability, loading its PF driver would enable it and all
30VFs associated with the PF.
31
322.2 How can I use the Virtual Functions
33
34The VF is treated as hot-plugged PCI devices in the kernel, so they
35should be able to work in the same way as real PCI devices. The VF
36requires device driver that is same as a normal PCI device's.
37
383. Developer Guide
39
403.1 SR-IOV API
41
42To enable SR-IOV capability:
43 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
44 'nr_virtfn' is number of VFs to be enabled.
45
46To disable SR-IOV capability:
47 void pci_disable_sriov(struct pci_dev *dev);
48
49To notify SR-IOV core of Virtual Function Migration:
50 irqreturn_t pci_sriov_migration(struct pci_dev *dev);
51
523.2 Usage example
53
54Following piece of code illustrates the usage of the SR-IOV API.
55
56static int __devinit dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
57{
58 pci_enable_sriov(dev, NR_VIRTFN);
59
60 ...
61
62 return 0;
63}
64
65static void __devexit dev_remove(struct pci_dev *dev)
66{
67 pci_disable_sriov(dev);
68
69 ...
70}
71
72static int dev_suspend(struct pci_dev *dev, pm_message_t state)
73{
74 ...
75
76 return 0;
77}
78
79static int dev_resume(struct pci_dev *dev)
80{
81 ...
82
83 return 0;
84}
85
86static void dev_shutdown(struct pci_dev *dev)
87{
88 ...
89}
90
91static struct pci_driver dev_driver = {
92 .name = "SR-IOV Physical Function driver",
93 .id_table = dev_id_table,
94 .probe = dev_probe,
95 .remove = __devexit_p(dev_remove),
96 .suspend = dev_suspend,
97 .resume = dev_resume,
98 .shutdown = dev_shutdown,
99};
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index ddeb14beacc8..be21001ab144 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -61,6 +61,10 @@ be initiated although firmwares have no _OSC support. To enable the
61walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line 61walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
62when booting kernel. Note that forceload=n by default. 62when booting kernel. Note that forceload=n by default.
63 63
64nosourceid, another parameter of type bool, can be used when broken
65hardware (mostly chipsets) has root ports that cannot obtain the reporting
66source ID. nosourceid=n by default.
67
642.3 AER error output 682.3 AER error output
65When a PCI-E AER error is captured, an error message will be outputed to 69When a PCI-E AER error is captured, an error message will be outputed to
66console. If it's a correctable error, it is outputed as a warning. 70console. If it's a correctable error, it is outputed as a warning.
@@ -246,3 +250,24 @@ with the PCI Express AER Root driver?
246A: It could call the helper functions to enable AER in devices and 250A: It could call the helper functions to enable AER in devices and
247cleanup uncorrectable status register. Pls. refer to section 3.3. 251cleanup uncorrectable status register. Pls. refer to section 3.3.
248 252
253
2544. Software error injection
255
256Debugging PCIE AER error recovery code is quite difficult because it
257is hard to trigger real hardware errors. Software based error
258injection can be used to fake various kinds of PCIE errors.
259
260First you should enable PCIE AER software error injection in kernel
261configuration, that is, following item should be in your .config.
262
263CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m
264
265After reboot with new kernel or insert the module, a device file named
266/dev/aer_inject should be created.
267
268Then, you need a user space tool named aer-inject, which can be gotten
269from:
270 http://www.kernel.org/pub/linux/utils/pci/aer-inject/
271
272More information about aer-inject can be found in the document comes
273with its source code.
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
index 1fd175368a87..4349c1487e91 100644
--- a/Documentation/RCU/listRCU.txt
+++ b/Documentation/RCU/listRCU.txt
@@ -118,7 +118,7 @@ Following are the RCU equivalents for these two functions:
118 list_for_each_entry(e, list, list) { 118 list_for_each_entry(e, list, list) {
119 if (!audit_compare_rule(rule, &e->rule)) { 119 if (!audit_compare_rule(rule, &e->rule)) {
120 list_del_rcu(&e->list); 120 list_del_rcu(&e->list);
121 call_rcu(&e->rcu, audit_free_rule, e); 121 call_rcu(&e->rcu, audit_free_rule);
122 return 0; 122 return 0;
123 } 123 }
124 } 124 }
@@ -206,7 +206,7 @@ RCU ("read-copy update") its name. The RCU code is as follows:
206 ne->rule.action = newaction; 206 ne->rule.action = newaction;
207 ne->rule.file_count = newfield_count; 207 ne->rule.file_count = newfield_count;
208 list_replace_rcu(e, ne); 208 list_replace_rcu(e, ne);
209 call_rcu(&e->rcu, audit_free_rule, e); 209 call_rcu(&e->rcu, audit_free_rule);
210 return 0; 210 return 0;
211 } 211 }
212 } 212 }
@@ -283,7 +283,7 @@ flag under the spinlock as follows:
283 list_del_rcu(&e->list); 283 list_del_rcu(&e->list);
284 e->deleted = 1; 284 e->deleted = 1;
285 spin_unlock(&e->lock); 285 spin_unlock(&e->lock);
286 call_rcu(&e->rcu, audit_free_rule, e); 286 call_rcu(&e->rcu, audit_free_rule);
287 return 0; 287 return 0;
288 } 288 }
289 } 289 }
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 95821a29ae41..7aa2002ade77 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -81,7 +81,7 @@ o I hear that RCU needs work in order to support realtime kernels?
81 This work is largely completed. Realtime-friendly RCU can be 81 This work is largely completed. Realtime-friendly RCU can be
82 enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter. 82 enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter.
83 However, work is in progress for enabling priority boosting of 83 However, work is in progress for enabling priority boosting of
84 preempted RCU read-side critical sections.This is needed if you 84 preempted RCU read-side critical sections. This is needed if you
85 have CPU-bound realtime threads. 85 have CPU-bound realtime threads.
86 86
87o Where can I find more information on RCU? 87o Where can I find more information on RCU?
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 239f542d48ba..93cb28d05dcd 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -21,7 +21,7 @@ if (obj) {
21 /* 21 /*
22 * Because a writer could delete object, and a writer could 22 * Because a writer could delete object, and a writer could
23 * reuse these object before the RCU grace period, we 23 * reuse these object before the RCU grace period, we
24 * must check key after geting the reference on object 24 * must check key after getting the reference on object
25 */ 25 */
26 if (obj->key != key) { // not the object we expected 26 if (obj->key != key) { // not the object we expected
27 put_ref(obj); 27 put_ref(obj);
@@ -117,8 +117,8 @@ a race (some writer did a delete and/or a move of an object
117to another chain) checking the final 'nulls' value if 117to another chain) checking the final 'nulls' value if
118the lookup met the end of chain. If final 'nulls' value 118the lookup met the end of chain. If final 'nulls' value
119is not the slot number, then we must restart the lookup at 119is not the slot number, then we must restart the lookup at
120the begining. If the object was moved to same chain, 120the beginning. If the object was moved to the same chain,
121then the reader doesnt care : It might eventually 121then the reader doesn't care : It might eventually
122scan the list again without harm. 122scan the list again without harm.
123 123
124 124
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 068848240a8b..02cced183b2d 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
192The output of "cat rcu/rcudata" looks as follows: 192The output of "cat rcu/rcudata" looks as follows:
193 193
194rcu: 194rcu:
195 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 195rcu:
196 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 196 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
197 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 197 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
198 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 198 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
199 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 199 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
200 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 200 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
201 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 201 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
202 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 202 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
203 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
203rcu_bh: 204rcu_bh:
204 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 205 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
205 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 206 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
206 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 207 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
207 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 208 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
208 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 209 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
209 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 210 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
210 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 211 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
211 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 212 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
212 213
213The first section lists the rcu_data structures for rcu, the second for 214The first section lists the rcu_data structures for rcu, the second for
214rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. 215rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent
253o "qp" indicates that RCU still expects a quiescent state from 254o "qp" indicates that RCU still expects a quiescent state from
254 this CPU. 255 this CPU.
255 256
256o "rpfq" is the number of rcu_pending() calls on this CPU required
257 to induce this CPU to invoke force_quiescent_state().
258
259o "rp" is low-order four hex digits of the count of how many times
260 rcu_pending() has been invoked on this CPU.
261
262o "dt" is the current value of the dyntick counter that is incremented 257o "dt" is the current value of the dyntick counter that is incremented
263 when entering or leaving dynticks idle state, either by the 258 when entering or leaving dynticks idle state, either by the
264 scheduler or by irq. The number after the "/" is the interrupt 259 scheduler or by irq. The number after the "/" is the interrupt
@@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number
305 of RCU callbacks is ready to invoke, then the remainder will 300 of RCU callbacks is ready to invoke, then the remainder will
306 be deferred. 301 be deferred.
307 302
303There is also an rcu/rcudata.csv file with the same information in
304comma-separated-variable spreadsheet format.
305
308 306
309The output of "cat rcu/rcugp" looks as follows: 307The output of "cat rcu/rcugp" looks as follows:
310 308
@@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
411 For example, the first entry at the lowest level shows 409 For example, the first entry at the lowest level shows
412 "^0", indicating that it corresponds to bit zero in 410 "^0", indicating that it corresponds to bit zero in
413 the first entry at the middle level. 411 the first entry at the middle level.
412
413
414The output of "cat rcu/rcu_pending" looks as follows:
415
416rcu:
417 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
418 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
419 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
420 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
421 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
422 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
423 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
424 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
425rcu_bh:
426 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
427 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
428 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
429 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
430 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
431 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
432 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
433 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
434
435As always, this is once again split into "rcu" and "rcu_bh" portions.
436The fields are as follows:
437
438o "np" is the number of times that __rcu_pending() has been invoked
439 for the corresponding flavor of RCU.
440
441o "qsp" is the number of times that the RCU was waiting for a
442 quiescent state from this CPU.
443
444o "cbr" is the number of times that this CPU had RCU callbacks
445 that had passed through a grace period, and were thus ready
446 to be invoked.
447
448o "cng" is the number of times that this CPU needed another
449 grace period while RCU was idle.
450
451o "gpc" is the number of times that an old grace period had
452 completed, but this CPU was not yet aware of it.
453
454o "gps" is the number of times that a new grace period had started,
455 but this CPU was not yet aware of it.
456
457o "nf" is the number of times that this CPU suspected that the
458 current grace period had run for too long, and thus needed to
459 be forced.
460
461 Please note that "forcing" consists of sending resched IPIs
462 to holdout CPUs. If that CPU really still is in an old RCU
463 read-side critical section, then we really do have to wait for it.
464 The assumption behing "forcing" is that the CPU is not still in
465 an old RCU read-side critical section, but has not yet responded
466 for some other reason.
467
468o "nn" is the number of times that this CPU needed nothing. Alert
469 readers will note that the rcu "nn" number for a given CPU very
470 closely matches the rcu_bh "np" number for that same CPU. This
471 is due to short-circuit evaluation in rcu_pending().
diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt
index 6fc656035925..561826f82093 100644
--- a/Documentation/SM501.txt
+++ b/Documentation/SM501.txt
@@ -5,7 +5,7 @@ Copyright 2006, 2007 Simtec Electronics
5 5
6The Silicon Motion SM501 multimedia companion chip is a multifunction device 6The Silicon Motion SM501 multimedia companion chip is a multifunction device
7which may provide numerous interfaces including USB host controller USB gadget, 7which may provide numerous interfaces including USB host controller USB gadget,
8Asyncronous Serial ports, Audio functions and a dual display video interface. 8asynchronous serial ports, audio functions, and a dual display video interface.
9The device may be connected by PCI or local bus with varying functions enabled. 9The device may be connected by PCI or local bus with varying functions enabled.
10 10
11Core 11Core
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
index 989c2fcd8111..34614b4c708e 100644
--- a/Documentation/Smack.txt
+++ b/Documentation/Smack.txt
@@ -184,14 +184,17 @@ length. Single character labels using special characters, that being anything
184other than a letter or digit, are reserved for use by the Smack development 184other than a letter or digit, are reserved for use by the Smack development
185team. Smack labels are unstructured, case sensitive, and the only operation 185team. Smack labels are unstructured, case sensitive, and the only operation
186ever performed on them is comparison for equality. Smack labels cannot 186ever performed on them is comparison for equality. Smack labels cannot
187contain unprintable characters or the "/" (slash) character. 187contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
188(quote) and '"' (double-quote) characters.
189Smack labels cannot begin with a '-', which is reserved for special options.
188 190
189There are some predefined labels: 191There are some predefined labels:
190 192
191 _ Pronounced "floor", a single underscore character. 193 _ Pronounced "floor", a single underscore character.
192 ^ Pronounced "hat", a single circumflex character. 194 ^ Pronounced "hat", a single circumflex character.
193 * Pronounced "star", a single asterisk character. 195 * Pronounced "star", a single asterisk character.
194 ? Pronounced "huh", a single question mark character. 196 ? Pronounced "huh", a single question mark character.
197 @ Pronounced "Internet", a single at sign character.
195 198
196Every task on a Smack system is assigned a label. System tasks, such as 199Every task on a Smack system is assigned a label. System tasks, such as
197init(8) and systems daemons, are run with the floor ("_") label. User tasks 200init(8) and systems daemons, are run with the floor ("_") label. User tasks
@@ -412,6 +415,36 @@ sockets.
412 A privileged program may set this to match the label of another 415 A privileged program may set this to match the label of another
413 task with which it hopes to communicate. 416 task with which it hopes to communicate.
414 417
418Smack Netlabel Exceptions
419
420You will often find that your labeled application has to talk to the outside,
421unlabeled world. To do this there's a special file /smack/netlabel where you can
422add some exceptions in the form of :
423@IP1 LABEL1 or
424@IP2/MASK LABEL2
425
426It means that your application will have unlabeled access to @IP1 if it has
427write access on LABEL1, and access to the subnet @IP2/MASK if it has write
428access on LABEL2.
429
430Entries in the /smack/netlabel file are matched by longest mask first, like in
431classless IPv4 routing.
432
433A special label '@' and an option '-CIPSO' can be used there :
434@ means Internet, any application with any label has access to it
435-CIPSO means standard CIPSO networking
436
437If you don't know what CIPSO is and don't plan to use it, you can just do :
438echo 127.0.0.1 -CIPSO > /smack/netlabel
439echo 0.0.0.0/0 @ > /smack/netlabel
440
441If you use CIPSO on your 192.168.0.0/16 local network and need also unlabeled
442Internet access, you can have :
443echo 127.0.0.1 -CIPSO > /smack/netlabel
444echo 192.168.0.0/16 -CIPSO > /smack/netlabel
445echo 0.0.0.0/0 @ > /smack/netlabel
446
447
415Writing Applications for Smack 448Writing Applications for Smack
416 449
417There are three sorts of applications that will run on a Smack system. How an 450There are three sorts of applications that will run on a Smack system. How an
@@ -491,3 +524,18 @@ Smack supports some mount options:
491 524
492These mount options apply to all file system types. 525These mount options apply to all file system types.
493 526
527Smack auditing
528
529If you want Smack auditing of security events, you need to set CONFIG_AUDIT
530in your kernel configuration.
531By default, all denied events will be audited. You can change this behavior by
532writing a single character to the /smack/logging file :
5330 : no logging
5341 : log denied (default)
5352 : log accepted
5363 : log denied & accepted
537
538Events are logged as 'key=value' pairs, for each event you at least will get
539the subjet, the object, the rights requested, the action, the kernel function
540that triggered the event, plus other pairs depending on the type of event
541audited.
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index ac5e0b2f1097..78a9168ff377 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -54,7 +54,7 @@ kernel patches.
54 CONFIG_PREEMPT. 54 CONFIG_PREEMPT.
55 55
5614: If the patch affects IO/Disk, etc: has been tested with and without 5614: If the patch affects IO/Disk, etc: has been tested with and without
57 CONFIG_LBD. 57 CONFIG_LBDAF.
58 58
5915: All codepaths have been exercised with all lockdep features enabled. 5915: All codepaths have been exercised with all lockdep features enabled.
60 60
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index f309d3c6221c..5c555a8b39e5 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -91,6 +91,10 @@ Be as specific as possible. The WORST descriptions possible include
91things like "update driver X", "bug fix for driver X", or "this patch 91things like "update driver X", "bug fix for driver X", or "this patch
92includes updates for subsystem X. Please apply." 92includes updates for subsystem X. Please apply."
93 93
94The maintainer will thank you if you write your patch description in a
95form which can be easily pulled into Linux's source code management
96system, git, as a "commit log". See #15, below.
97
94If your description starts to get long, that's a sign that you probably 98If your description starts to get long, that's a sign that you probably
95need to split up your patch. See #3, next. 99need to split up your patch. See #3, next.
96 100
@@ -183,8 +187,9 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS
183copy the maintainer when you change their code. 187copy the maintainer when you change their code.
184 188
185For small patches you may want to CC the Trivial Patch Monkey 189For small patches you may want to CC the Trivial Patch Monkey
186trivial@kernel.org managed by Jesper Juhl; which collects "trivial" 190trivial@kernel.org which collects "trivial" patches. Have a look
187patches. Trivial patches must qualify for one of the following rules: 191into the MAINTAINERS file for its current manager.
192Trivial patches must qualify for one of the following rules:
188 Spelling fixes in documentation 193 Spelling fixes in documentation
189 Spelling fixes which could break grep(1) 194 Spelling fixes which could break grep(1)
190 Warning fixes (cluttering with useless warnings is bad) 195 Warning fixes (cluttering with useless warnings is bad)
@@ -196,7 +201,6 @@ patches. Trivial patches must qualify for one of the following rules:
196 since people copy, as long as it's trivial) 201 since people copy, as long as it's trivial)
197 Any fix by the author/maintainer of the file (ie. patch monkey 202 Any fix by the author/maintainer of the file (ie. patch monkey
198 in re-transmission mode) 203 in re-transmission mode)
199URL: <http://www.kernel.org/pub/linux/kernel/people/juhl/trivial/>
200 204
201 205
202 206
@@ -405,7 +409,14 @@ person it names. This tag documents that potentially interested parties
405have been included in the discussion 409have been included in the discussion
406 410
407 411
40814) Using Tested-by: and Reviewed-by: 41214) Using Reported-by:, Tested-by: and Reviewed-by:
413
414If this patch fixes a problem reported by somebody else, consider adding a
415Reported-by: tag to credit the reporter for their contribution. Please
416note that this tag should not be added without the reporter's permission,
417especially if the problem was not reported in a public forum. That said,
418if we diligently credit our bug reporters, they will, hopefully, be
419inspired to help us again in the future.
409 420
410A Tested-by: tag indicates that the patch has been successfully tested (in 421A Tested-by: tag indicates that the patch has been successfully tested (in
411some environment) by the person named. This tag informs maintainers that 422some environment) by the person named. This tag informs maintainers that
@@ -444,7 +455,7 @@ offer a Reviewed-by tag for a patch. This tag serves to give credit to
444reviewers and to inform maintainers of the degree of review which has been 455reviewers and to inform maintainers of the degree of review which has been
445done on the patch. Reviewed-by: tags, when supplied by reviewers known to 456done on the patch. Reviewed-by: tags, when supplied by reviewers known to
446understand the subject area and to perform thorough reviews, will normally 457understand the subject area and to perform thorough reviews, will normally
447increase the liklihood of your patch getting into the kernel. 458increase the likelihood of your patch getting into the kernel.
448 459
449 460
45015) The canonical patch format 46115) The canonical patch format
@@ -485,12 +496,33 @@ phrase" should not be a filename. Do not use the same "summary
485phrase" for every patch in a whole patch series (where a "patch 496phrase" for every patch in a whole patch series (where a "patch
486series" is an ordered sequence of multiple, related patches). 497series" is an ordered sequence of multiple, related patches).
487 498
488Bear in mind that the "summary phrase" of your email becomes 499Bear in mind that the "summary phrase" of your email becomes a
489a globally-unique identifier for that patch. It propagates 500globally-unique identifier for that patch. It propagates all the way
490all the way into the git changelog. The "summary phrase" may 501into the git changelog. The "summary phrase" may later be used in
491later be used in developer discussions which refer to the patch. 502developer discussions which refer to the patch. People will want to
492People will want to google for the "summary phrase" to read 503google for the "summary phrase" to read discussion regarding that
493discussion regarding that patch. 504patch. It will also be the only thing that people may quickly see
505when, two or three months later, they are going through perhaps
506thousands of patches using tools such as "gitk" or "git log
507--oneline".
508
509For these reasons, the "summary" must be no more than 70-75
510characters, and it must describe both what the patch changes, as well
511as why the patch might be necessary. It is challenging to be both
512succinct and descriptive, but that is what a well-written summary
513should do.
514
515The "summary phrase" may be prefixed by tags enclosed in square
516brackets: "Subject: [PATCH tag] <summary phrase>". The tags are not
517considered part of the summary phrase, but describe how the patch
518should be treated. Common tags might include a version descriptor if
519the multiple versions of the patch have been sent out in response to
520comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for
521comments. If there are four patches in a patch series the individual
522patches may be numbered like this: 1/4, 2/4, 3/4, 4/4. This assures
523that developers understand the order in which the patches should be
524applied and that they have reviewed or applied all of the patches in
525the patch series.
494 526
495A couple of example Subjects: 527A couple of example Subjects:
496 528
@@ -510,19 +542,31 @@ the patch author in the changelog.
510The explanation body will be committed to the permanent source 542The explanation body will be committed to the permanent source
511changelog, so should make sense to a competent reader who has long 543changelog, so should make sense to a competent reader who has long
512since forgotten the immediate details of the discussion that might 544since forgotten the immediate details of the discussion that might
513have led to this patch. 545have led to this patch. Including symptoms of the failure which the
546patch addresses (kernel log messages, oops messages, etc.) is
547especially useful for people who might be searching the commit logs
548looking for the applicable patch. If a patch fixes a compile failure,
549it may not be necessary to include _all_ of the compile failures; just
550enough that it is likely that someone searching for the patch can find
551it. As in the "summary phrase", it is important to be both succinct as
552well as descriptive.
514 553
515The "---" marker line serves the essential purpose of marking for patch 554The "---" marker line serves the essential purpose of marking for patch
516handling tools where the changelog message ends. 555handling tools where the changelog message ends.
517 556
518One good use for the additional comments after the "---" marker is for 557One good use for the additional comments after the "---" marker is for
519a diffstat, to show what files have changed, and the number of inserted 558a diffstat, to show what files have changed, and the number of
520and deleted lines per file. A diffstat is especially useful on bigger 559inserted and deleted lines per file. A diffstat is especially useful
521patches. Other comments relevant only to the moment or the maintainer, 560on bigger patches. Other comments relevant only to the moment or the
522not suitable for the permanent changelog, should also go here. 561maintainer, not suitable for the permanent changelog, should also go
523Use diffstat options "-p 1 -w 70" so that filenames are listed from the 562here. A good example of such comments might be "patch changelogs"
524top of the kernel source tree and don't use too much horizontal space 563which describe what has changed between the v1 and v2 version of the
525(easily fit in 80 columns, maybe with some indentation). 564patch.
565
566If you are going to include a diffstat after the "---" marker, please
567use diffstat options "-p 1 -w 70" so that filenames are listed from
568the top of the kernel source tree and don't use too much horizontal
569space (easily fit in 80 columns, maybe with some indentation).
526 570
527See more details on the proper patch format in the following 571See more details on the proper patch format in the following
528references. 572references.
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 7ea231172c85..aa73e72fd793 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -246,7 +246,8 @@ void print_ioacct(struct taskstats *t)
246 246
247int main(int argc, char *argv[]) 247int main(int argc, char *argv[])
248{ 248{
249 int c, rc, rep_len, aggr_len, len2, cmd_type; 249 int c, rc, rep_len, aggr_len, len2;
250 int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
250 __u16 id; 251 __u16 id;
251 __u32 mypid; 252 __u32 mypid;
252 253
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
index ea7ccfc4b274..948c8718d967 100644
--- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt
+++ b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
@@ -51,7 +51,7 @@ PIN Numbers
51----------- 51-----------
52 52
53 Each pin has an unique number associated with it in regs-gpio.h, 53 Each pin has an unique number associated with it in regs-gpio.h,
54 eg S3C2410_GPA0 or S3C2410_GPF1. These defines are used to tell 54 eg S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
55 the GPIO functions which pin is to be used. 55 the GPIO functions which pin is to be used.
56 56
57 57
@@ -65,11 +65,11 @@ Configuring a pin
65 65
66 Eg: 66 Eg:
67 67
68 s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0); 68 s3c2410_gpio_cfgpin(S3C2410_GPA(0), S3C2410_GPA0_ADDR0);
69 s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1); 69 s3c2410_gpio_cfgpin(S3C2410_GPE(8), S3C2410_GPE8_SDDAT1);
70 70
71 which would turn GPA0 into the lowest Address line A0, and set 71 which would turn GPA(0) into the lowest Address line A0, and set
72 GPE8 to be connected to the SDIO/MMC controller's SDDAT1 line. 72 GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
73 73
74 74
75Reading the current configuration 75Reading the current configuration
diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/Samsung-S3C24XX/Suspend.txt
index 0dab6e32c130..a30fe510572b 100644
--- a/Documentation/arm/Samsung-S3C24XX/Suspend.txt
+++ b/Documentation/arm/Samsung-S3C24XX/Suspend.txt
@@ -40,13 +40,13 @@ Resuming
40Machine Support 40Machine Support
41--------------- 41---------------
42 42
43 The machine specific functions must call the s3c2410_pm_init() function 43 The machine specific functions must call the s3c_pm_init() function
44 to say that its bootloader is capable of resuming. This can be as 44 to say that its bootloader is capable of resuming. This can be as
45 simple as adding the following to the machine's definition: 45 simple as adding the following to the machine's definition:
46 46
47 INITMACHINE(s3c2410_pm_init) 47 INITMACHINE(s3c_pm_init)
48 48
49 A board can do its own setup before calling s3c2410_pm_init, if it 49 A board can do its own setup before calling s3c_pm_init, if it
50 needs to setup anything else for power management support. 50 needs to setup anything else for power management support.
51 51
52 There is currently no support for over-riding the default method of 52 There is currently no support for over-riding the default method of
@@ -74,7 +74,7 @@ statuc void __init machine_init(void)
74 74
75 enable_irq_wake(IRQ_EINT0); 75 enable_irq_wake(IRQ_EINT0);
76 76
77 s3c2410_pm_init(); 77 s3c_pm_init();
78} 78}
79 79
80 80
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index dc6045577a8b..43cb1004d35f 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -29,7 +29,14 @@ ffff0000 ffff0fff CPU vector page.
29 CPU supports vector relocation (control 29 CPU supports vector relocation (control
30 register V bit.) 30 register V bit.)
31 31
32ffc00000 fffeffff DMA memory mapping region. Memory returned 32fffe0000 fffeffff XScale cache flush area. This is used
33 in proc-xscale.S to flush the whole data
34 cache. Free for other usage on non-XScale.
35
36fff00000 fffdffff Fixmap mapping region. Addresses provided
37 by fix_to_virt() will be located here.
38
39ffc00000 ffefffff DMA memory mapping region. Memory returned
33 by the dma_alloc_xxx functions will be 40 by the dma_alloc_xxx functions will be
34 dynamically mapped here. 41 dynamically mapped here.
35 42
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 4ef245010457..396bec3b74ed 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -229,10 +229,10 @@ kernel. It is the use of atomic counters to implement reference
229counting, and it works such that once the counter falls to zero it can 229counting, and it works such that once the counter falls to zero it can
230be guaranteed that no other entity can be accessing the object: 230be guaranteed that no other entity can be accessing the object:
231 231
232static void obj_list_add(struct obj *obj) 232static void obj_list_add(struct obj *obj, struct list_head *head)
233{ 233{
234 obj->active = 1; 234 obj->active = 1;
235 list_add(&obj->list); 235 list_add(&obj->list, head);
236} 236}
237 237
238static void obj_list_del(struct obj *obj) 238static void obj_list_del(struct obj *obj)
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index ecad6ee75705..8d2158a1c6aa 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
186do not have a corresponding kernel virtual address space mapping) and 186do not have a corresponding kernel virtual address space mapping) and
187low-memory pages. 187low-memory pages.
188 188
189Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion 189Note: Please refer to Documentation/DMA-mapping.txt for a discussion
190on PCI high mem DMA aspects and mapping of scatter gather lists, and support 190on PCI high mem DMA aspects and mapping of scatter gather lists, and support
191for 64 bit PCI. 191for 64 bit PCI.
192 192
@@ -1040,23 +1040,21 @@ Front merges are handled by the binary trees in AS and deadline schedulers.
1040iii. Plugging the queue to batch requests in anticipation of opportunities for 1040iii. Plugging the queue to batch requests in anticipation of opportunities for
1041 merge/sort optimizations 1041 merge/sort optimizations
1042 1042
1043This is just the same as in 2.4 so far, though per-device unplugging
1044support is anticipated for 2.5. Also with a priority-based i/o scheduler,
1045such decisions could be based on request priorities.
1046
1047Plugging is an approach that the current i/o scheduling algorithm resorts to so 1043Plugging is an approach that the current i/o scheduling algorithm resorts to so
1048that it collects up enough requests in the queue to be able to take 1044that it collects up enough requests in the queue to be able to take
1049advantage of the sorting/merging logic in the elevator. If the 1045advantage of the sorting/merging logic in the elevator. If the
1050queue is empty when a request comes in, then it plugs the request queue 1046queue is empty when a request comes in, then it plugs the request queue
1051(sort of like plugging the bottom of a vessel to get fluid to build up) 1047(sort of like plugging the bath tub of a vessel to get fluid to build up)
1052till it fills up with a few more requests, before starting to service 1048till it fills up with a few more requests, before starting to service
1053the requests. This provides an opportunity to merge/sort the requests before 1049the requests. This provides an opportunity to merge/sort the requests before
1054passing them down to the device. There are various conditions when the queue is 1050passing them down to the device. There are various conditions when the queue is
1055unplugged (to open up the flow again), either through a scheduled task or 1051unplugged (to open up the flow again), either through a scheduled task or
1056could be on demand. For example wait_on_buffer sets the unplugging going 1052could be on demand. For example wait_on_buffer sets the unplugging going
1057(by running tq_disk) so the read gets satisfied soon. So in the read case, 1053through sync_buffer() running blk_run_address_space(mapping). Or the caller
1058the queue gets explicitly unplugged as part of waiting for completion, 1054can do it explicity through blk_unplug(bdev). So in the read case,
1059in fact all queues get unplugged as a side-effect. 1055the queue gets explicitly unplugged as part of waiting for completion on that
1056buffer. For page driven IO, the address space ->sync_page() takes care of
1057doing the blk_run_address_space().
1060 1058
1061Aside: 1059Aside:
1062 This is kind of controversial territory, as it's not clear if plugging is 1060 This is kind of controversial territory, as it's not clear if plugging is
@@ -1067,11 +1065,6 @@ Aside:
1067 multi-page bios being queued in one shot, we may not need to wait to merge 1065 multi-page bios being queued in one shot, we may not need to wait to merge
1068 a big request from the broken up pieces coming by. 1066 a big request from the broken up pieces coming by.
1069 1067
1070 Per-queue granularity unplugging (still a Todo) may help reduce some of the
1071 concerns with just a single tq_disk flush approach. Something like
1072 blk_kick_queue() to unplug a specific queue (right away ?)
1073 or optionally, all queues, is in the plan.
1074
10754.4 I/O contexts 10684.4 I/O contexts
1076I/O contexts provide a dynamically allocated per process data area. They may 1069I/O contexts provide a dynamically allocated per process data area. They may
1077be used in I/O schedulers, and in the block layer (could be used for IO statis, 1070be used in I/O schedulers, and in the block layer (could be used for IO statis,
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index 72576769e0f4..2d82c80322cb 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt
@@ -58,7 +58,7 @@ same criteria as reads.
58front_merges (bool) 58front_merges (bool)
59------------ 59------------
60 60
61Sometimes it happens that a request enters the io scheduler that is contigious 61Sometimes it happens that a request enters the io scheduler that is contiguous
62with a request that is already on the queue. Either it fits in the back of that 62with a request that is already on the queue. Either it fits in the back of that
63request, or it fits at the front. That is called either a back merge candidate 63request, or it fits at the front. That is called either a back merge candidate
64or a front merge candidate. Due to the way files are typically laid out, 64or a front merge candidate. Due to the way files are typically laid out,
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
index 634c952e1964..d5af3f630814 100644
--- a/Documentation/block/switching-sched.txt
+++ b/Documentation/block/switching-sched.txt
@@ -35,9 +35,3 @@ noop anticipatory deadline [cfq]
35# echo anticipatory > /sys/block/hda/queue/scheduler 35# echo anticipatory > /sys/block/hda/queue/scheduler
36# cat /sys/block/hda/queue/scheduler 36# cat /sys/block/hda/queue/scheduler
37noop [anticipatory] deadline cfq 37noop [anticipatory] deadline cfq
38
39Each io queue has a set of io scheduler tunables associated with it. These
40tunables control how the io scheduler works. You can find these entries
41in:
42
43/sys/block/<device>/queue/iosched
diff --git a/Documentation/blockdev/00-INDEX b/Documentation/blockdev/00-INDEX
index 86f054c47013..c08df56dd91b 100644
--- a/Documentation/blockdev/00-INDEX
+++ b/Documentation/blockdev/00-INDEX
@@ -8,6 +8,8 @@ cpqarray.txt
8 - info on using Compaq's SMART2 Intelligent Disk Array Controllers. 8 - info on using Compaq's SMART2 Intelligent Disk Array Controllers.
9floppy.txt 9floppy.txt
10 - notes and driver options for the floppy disk driver. 10 - notes and driver options for the floppy disk driver.
11mflash.txt
12 - info on mGine m(g)flash driver for linux.
11nbd.txt 13nbd.txt
12 - info on a TCP implementation of a network block device. 14 - info on a TCP implementation of a network block device.
13paride.txt 15paride.txt
diff --git a/Documentation/blockdev/mflash.txt b/Documentation/blockdev/mflash.txt
new file mode 100644
index 000000000000..1f610ecf698a
--- /dev/null
+++ b/Documentation/blockdev/mflash.txt
@@ -0,0 +1,84 @@
1This document describes m[g]flash support in linux.
2
3Contents
4 1. Overview
5 2. Reserved area configuration
6 3. Example of mflash platform driver registration
7
81. Overview
9
10Mflash and gflash are embedded flash drive. The only difference is mflash is
11MCP(Multi Chip Package) device. These two device operate exactly same way.
12So the rest mflash repersents mflash and gflash altogether.
13
14Internally, mflash has nand flash and other hardware logics and supports
152 different operation (ATA, IO) modes. ATA mode doesn't need any new
16driver and currently works well under standard IDE subsystem. Actually it's
17one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have
18IDE interface.
19
20Followings are brief descriptions about IO mode.
21A. IO mode based on ATA protocol and uses some custom command. (read confirm,
22write confirm)
23B. IO mode uses SRAM bus interface.
24C. IO mode supports 4kB boot area, so host can boot from mflash.
25
262. Reserved area configuration
27If host boot from mflash, usually needs raw area for boot loader image. All of
28the mflash's block device operation will be taken this value as start offset.
29Note that boot loader's size of reserved area and kernel configuration value
30must be same.
31
323. Example of mflash platform driver registration
33Working mflash is very straight forward. Adding platform device stuff to board
34configuration file is all. Here is some pseudo example.
35
36static struct mg_drv_data mflash_drv_data = {
37 /* If you want to polling driver set to 1 */
38 .use_polling = 0,
39 /* device attribution */
40 .dev_attr = MG_BOOT_DEV
41};
42
43static struct resource mg_mflash_rsc[] = {
44 /* Base address of mflash */
45 [0] = {
46 .start = 0x08000000,
47 .end = 0x08000000 + SZ_64K - 1,
48 .flags = IORESOURCE_MEM
49 },
50 /* mflash interrupt pin */
51 [1] = {
52 .start = IRQ_GPIO(84),
53 .end = IRQ_GPIO(84),
54 .flags = IORESOURCE_IRQ
55 },
56 /* mflash reset pin */
57 [2] = {
58 .start = 43,
59 .end = 43,
60 .name = MG_RST_PIN,
61 .flags = IORESOURCE_IO
62 },
63 /* mflash reset-out pin
64 * If you use mflash as storage device (i.e. other than MG_BOOT_DEV),
65 * should assign this */
66 [3] = {
67 .start = 51,
68 .end = 51,
69 .name = MG_RSTOUT_PIN,
70 .flags = IORESOURCE_IO
71 }
72};
73
74static struct platform_device mflash_dev = {
75 .name = MG_DEV_NAME,
76 .id = -1,
77 .dev = {
78 .platform_data = &mflash_drv_data,
79 },
80 .num_resources = ARRAY_SIZE(mg_mflash_rsc),
81 .resource = mg_mflash_rsc
82};
83
84platform_device_register(&mflash_dev);
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt
index 000b0fbdc105..d0d042c2fd5e 100644
--- a/Documentation/braille-console.txt
+++ b/Documentation/braille-console.txt
@@ -27,7 +27,7 @@ parameter.
27 27
28For simplicity, only one braille console can be enabled, other uses of 28For simplicity, only one braille console can be enabled, other uses of
29console=brl,... will be discarded. Also note that it does not interfere with 29console=brl,... will be discarded. Also note that it does not interfere with
30the console selection mecanism described in serial-console.txt 30the console selection mechanism described in serial-console.txt
31 31
32For now, only the VisioBraille device is supported. 32For now, only the VisioBraille device is supported.
33 33
diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt
index cf1f8126991c..1c407778c8b2 100644
--- a/Documentation/cdrom/packet-writing.txt
+++ b/Documentation/cdrom/packet-writing.txt
@@ -117,7 +117,7 @@ Using the pktcdvd debugfs interface
117 117
118To read pktcdvd device infos in human readable form, do: 118To read pktcdvd device infos in human readable form, do:
119 119
120 # cat /debug/pktcdvd/pktcdvd[0-7]/info 120 # cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
121 121
122For a description of the debugfs interface look into the file: 122For a description of the debugfs interface look into the file:
123 123
diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX
new file mode 100644
index 000000000000..3f58fa3d6d00
--- /dev/null
+++ b/Documentation/cgroups/00-INDEX
@@ -0,0 +1,18 @@
100-INDEX
2 - this file
3cgroups.txt
4 - Control Groups definition, implementation details, examples and API.
5cpuacct.txt
6 - CPU Accounting Controller; account CPU usage for groups of tasks.
7cpusets.txt
8 - documents the cpusets feature; assign CPUs and Mem to a set of tasks.
9devices.txt
10 - Device Whitelist Controller; description, interface and security.
11freezer-subsystem.txt
12 - checkpointing; rationale to not use signals, interface.
13memcg_test.txt
14 - Memory Resource Controller; implementation details.
15memory.txt
16 - Memory Resource Controller; design, accounting, interface, testing.
17resource_counter.txt
18 - Resource Counter API.
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 93feb8444489..6eb1a97e88ce 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -56,7 +56,7 @@ hierarchy, and a set of subsystems; each subsystem has system-specific
56state attached to each cgroup in the hierarchy. Each hierarchy has 56state attached to each cgroup in the hierarchy. Each hierarchy has
57an instance of the cgroup virtual filesystem associated with it. 57an instance of the cgroup virtual filesystem associated with it.
58 58
59At any one time there may be multiple active hierachies of task 59At any one time there may be multiple active hierarchies of task
60cgroups. Each hierarchy is a partition of all tasks in the system. 60cgroups. Each hierarchy is a partition of all tasks in the system.
61 61
62User level code may create and destroy cgroups by name in an 62User level code may create and destroy cgroups by name in an
@@ -124,10 +124,10 @@ following lines:
124 / \ 124 / \
125 Prof (15%) students (5%) 125 Prof (15%) students (5%)
126 126
127Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go 127Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd go
128into NFS network class. 128into NFS network class.
129 129
130At the same time firefox/lynx will share an appropriate CPU/Memory class 130At the same time Firefox/Lynx will share an appropriate CPU/Memory class
131depending on who launched it (prof/student). 131depending on who launched it (prof/student).
132 132
133With the ability to classify tasks differently for different resources 133With the ability to classify tasks differently for different resources
@@ -325,7 +325,7 @@ and then start a subshell 'sh' in that cgroup:
325Creating, modifying, using the cgroups can be done through the cgroup 325Creating, modifying, using the cgroups can be done through the cgroup
326virtual filesystem. 326virtual filesystem.
327 327
328To mount a cgroup hierarchy will all available subsystems, type: 328To mount a cgroup hierarchy with all available subsystems, type:
329# mount -t cgroup xxx /dev/cgroup 329# mount -t cgroup xxx /dev/cgroup
330 330
331The "xxx" is not interpreted by the cgroup code, but will appear in 331The "xxx" is not interpreted by the cgroup code, but will appear in
@@ -333,12 +333,23 @@ The "xxx" is not interpreted by the cgroup code, but will appear in
333 333
334To mount a cgroup hierarchy with just the cpuset and numtasks 334To mount a cgroup hierarchy with just the cpuset and numtasks
335subsystems, type: 335subsystems, type:
336# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup 336# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
337 337
338To change the set of subsystems bound to a mounted hierarchy, just 338To change the set of subsystems bound to a mounted hierarchy, just
339remount with different options: 339remount with different options:
340# mount -o remount,cpuset,ns hier1 /dev/cgroup
340 341
341# mount -o remount,cpuset,ns /dev/cgroup 342Now memory is removed from the hierarchy and ns is added.
343
344Note this will add ns to the hierarchy but won't remove memory or
345cpuset, because the new options are appended to the old ones:
346# mount -o remount,ns /dev/cgroup
347
348To Specify a hierarchy's release_agent:
349# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
350 xxx /dev/cgroup
351
352Note that specifying 'release_agent' more than once will return failure.
342 353
343Note that changing the set of subsystems is currently only supported 354Note that changing the set of subsystems is currently only supported
344when the hierarchy consists of a single (root) cgroup. Supporting 355when the hierarchy consists of a single (root) cgroup. Supporting
@@ -349,6 +360,11 @@ Then under /dev/cgroup you can find a tree that corresponds to the
349tree of the cgroups in the system. For instance, /dev/cgroup 360tree of the cgroups in the system. For instance, /dev/cgroup
350is the cgroup that holds the whole system. 361is the cgroup that holds the whole system.
351 362
363If you want to change the value of release_agent:
364# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent
365
366It can also be changed via remount.
367
352If you want to create a new cgroup under /dev/cgroup: 368If you want to create a new cgroup under /dev/cgroup:
353# cd /dev/cgroup 369# cd /dev/cgroup
354# mkdir my_cgroup 370# mkdir my_cgroup
@@ -476,11 +492,13 @@ cgroup->parent is still valid. (Note - can also be called for a
476newly-created cgroup if an error occurs after this subsystem's 492newly-created cgroup if an error occurs after this subsystem's
477create() method has been called for the new cgroup). 493create() method has been called for the new cgroup).
478 494
479void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); 495int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
480 496
481Called before checking the reference count on each subsystem. This may 497Called before checking the reference count on each subsystem. This may
482be useful for subsystems which have some extra references even if 498be useful for subsystems which have some extra references even if
483there are not tasks in the cgroup. 499there are not tasks in the cgroup. If pre_destroy() returns error code,
500rmdir() will fail with it. From this behavior, pre_destroy() can be
501called multiple times against a cgroup.
484 502
485int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 503int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
486 struct task_struct *task) 504 struct task_struct *task)
@@ -521,7 +539,7 @@ always handled well.
521void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) 539void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
522(cgroup_mutex held by caller) 540(cgroup_mutex held by caller)
523 541
524Called at the end of cgroup_clone() to do any paramater 542Called at the end of cgroup_clone() to do any parameter
525initialization which might be required before a task could attach. For 543initialization which might be required before a task could attach. For
526example in cpusets, no task may attach before 'cpus' and 'mems' are set 544example in cpusets, no task may attach before 'cpus' and 'mems' are set
527up. 545up.
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
index bb775fbe43d7..8b930946c52a 100644
--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -30,3 +30,21 @@ The above steps create a new group g1 and move the current shell
30process (bash) into it. CPU time consumed by this bash and its children 30process (bash) into it. CPU time consumed by this bash and its children
31can be obtained from g1/cpuacct.usage and the same is accumulated in 31can be obtained from g1/cpuacct.usage and the same is accumulated in
32/cgroups/cpuacct.usage also. 32/cgroups/cpuacct.usage also.
33
34cpuacct.stat file lists a few statistics which further divide the
35CPU time obtained by the cgroup into user and system times. Currently
36the following statistics are supported:
37
38user: Time spent by tasks of the cgroup in user mode.
39system: Time spent by tasks of the cgroup in kernel mode.
40
41user and system are in USER_HZ unit.
42
43cpuacct controller uses percpu_counter interface to collect user and
44system times. This has two side effects:
45
46- It is theoretically possible to see wrong values for user and system times.
47 This is because percpu_counter_read() on 32bit systems isn't safe
48 against concurrent writes.
49- It is possible to see slightly outdated values for user and system times
50 due to the batch processing nature of percpu_counter.
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index 0611e9528c7c..f9ca389dddf4 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -131,7 +131,7 @@ Cpusets extends these two mechanisms as follows:
131 - The hierarchy of cpusets can be mounted at /dev/cpuset, for 131 - The hierarchy of cpusets can be mounted at /dev/cpuset, for
132 browsing and manipulation from user space. 132 browsing and manipulation from user space.
133 - A cpuset may be marked exclusive, which ensures that no other 133 - A cpuset may be marked exclusive, which ensures that no other
134 cpuset (except direct ancestors and descendents) may contain 134 cpuset (except direct ancestors and descendants) may contain
135 any overlapping CPUs or Memory Nodes. 135 any overlapping CPUs or Memory Nodes.
136 - You can list all the tasks (by pid) attached to any cpuset. 136 - You can list all the tasks (by pid) attached to any cpuset.
137 137
@@ -226,7 +226,7 @@ nodes with memory--using the cpuset_track_online_nodes() hook.
226-------------------------------- 226--------------------------------
227 227
228If a cpuset is cpu or mem exclusive, no other cpuset, other than 228If a cpuset is cpu or mem exclusive, no other cpuset, other than
229a direct ancestor or descendent, may share any of the same CPUs or 229a direct ancestor or descendant, may share any of the same CPUs or
230Memory Nodes. 230Memory Nodes.
231 231
232A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", 232A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
@@ -427,7 +427,7 @@ child cpusets have this flag enabled.
427When doing this, you don't usually want to leave any unpinned tasks in 427When doing this, you don't usually want to leave any unpinned tasks in
428the top cpuset that might use non-trivial amounts of CPU, as such tasks 428the top cpuset that might use non-trivial amounts of CPU, as such tasks
429may be artificially constrained to some subset of CPUs, depending on 429may be artificially constrained to some subset of CPUs, depending on
430the particulars of this flag setting in descendent cpusets. Even if 430the particulars of this flag setting in descendant cpusets. Even if
431such a task could use spare CPU cycles in some other CPUs, the kernel 431such a task could use spare CPU cycles in some other CPUs, the kernel
432scheduler might not consider the possibility of load balancing that 432scheduler might not consider the possibility of load balancing that
433task to that underused CPU. 433task to that underused CPU.
@@ -531,9 +531,9 @@ be idle.
531 531
532Of course it takes some searching cost to find movable tasks and/or 532Of course it takes some searching cost to find movable tasks and/or
533idle CPUs, the scheduler might not search all CPUs in the domain 533idle CPUs, the scheduler might not search all CPUs in the domain
534everytime. In fact, in some architectures, the searching ranges on 534every time. In fact, in some architectures, the searching ranges on
535events are limited in the same socket or node where the CPU locates, 535events are limited in the same socket or node where the CPU locates,
536while the load balance on tick searchs all. 536while the load balance on tick searches all.
537 537
538For example, assume CPU Z is relatively far from CPU X. Even if CPU Z 538For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
539is idle while CPU X and the siblings are busy, scheduler can't migrate 539is idle while CPU X and the siblings are busy, scheduler can't migrate
@@ -601,7 +601,7 @@ its new cpuset, then the task will continue to use whatever subset
601of MPOL_BIND nodes are still allowed in the new cpuset. If the task 601of MPOL_BIND nodes are still allowed in the new cpuset. If the task
602was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed 602was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
603in the new cpuset, then the task will be essentially treated as if it 603in the new cpuset, then the task will be essentially treated as if it
604was MPOL_BIND bound to the new cpuset (even though its numa placement, 604was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
605as queried by get_mempolicy(), doesn't change). If a task is moved 605as queried by get_mempolicy(), doesn't change). If a task is moved
606from one cpuset to another, then the kernel will adjust the tasks 606from one cpuset to another, then the kernel will adjust the tasks
607memory placement, as above, the next time that the kernel attempts 607memory placement, as above, the next time that the kernel attempts
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt
index 7cc6e6a60672..57ca4c89fe5c 100644
--- a/Documentation/cgroups/devices.txt
+++ b/Documentation/cgroups/devices.txt
@@ -42,7 +42,7 @@ suffice, but we can decide the best way to adequately restrict
42movement as people get some experience with this. We may just want 42movement as people get some experience with this. We may just want
43to require CAP_SYS_ADMIN, which at least is a separate bit from 43to require CAP_SYS_ADMIN, which at least is a separate bit from
44CAP_MKNOD. We may want to just refuse moving to a cgroup which 44CAP_MKNOD. We may want to just refuse moving to a cgroup which
45isn't a descendent of the current one. Or we may want to use 45isn't a descendant of the current one. Or we may want to use
46CAP_MAC_ADMIN, since we really are trying to lock down root. 46CAP_MAC_ADMIN, since we really are trying to lock down root.
47 47
48CAP_SYS_ADMIN is needed to modify the whitelist or move another 48CAP_SYS_ADMIN is needed to modify the whitelist or move another
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 523a9c16c400..72db89ed0609 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -1,5 +1,5 @@
1Memory Resource Controller(Memcg) Implementation Memo. 1Memory Resource Controller(Memcg) Implementation Memo.
2Last Updated: 2009/1/19 2Last Updated: 2009/1/20
3Base Kernel Version: based on 2.6.29-rc2. 3Base Kernel Version: based on 2.6.29-rc2.
4 4
5Because VM is getting complex (one of reasons is memcg...), memcg's behavior 5Because VM is getting complex (one of reasons is memcg...), memcg's behavior
@@ -356,7 +356,25 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
356 (Shell-B) 356 (Shell-B)
357 # move all tasks in /cgroup/test to /cgroup 357 # move all tasks in /cgroup/test to /cgroup
358 # /sbin/swapoff -a 358 # /sbin/swapoff -a
359 # rmdir /test/cgroup 359 # rmdir /cgroup/test
360 # kill malloc task. 360 # kill malloc task.
361 361
362 Of course, tmpfs v.s. swapoff test should be tested, too. 362 Of course, tmpfs v.s. swapoff test should be tested, too.
363
364 9.8 OOM-Killer
365 Out-of-memory caused by memcg's limit will kill tasks under
366 the memcg. When hierarchy is used, a task under hierarchy
367 will be killed by the kernel.
368 In this case, panic_on_oom shouldn't be invoked and tasks
369 in other groups shouldn't be killed.
370
371 It's not difficult to cause OOM under memcg as following.
372 Case A) when you can swapoff
373 #swapoff -a
374 #echo 50M > /memory.limit_in_bytes
375 run 51M of malloc
376
377 Case B) when you use mem+swap limitation.
378 #echo 50M > memory.limit_in_bytes
379 #echo 50M > memory.memsw.limit_in_bytes
380 run 51M of malloc
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index e1501964df1e..23d1262c0775 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -6,15 +6,14 @@ used here with the memory controller that is used in hardware.
6 6
7Salient features 7Salient features
8 8
9a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages 9a. Enable control of Anonymous, Page Cache (mapped and unmapped) and
10 Swap Cache memory pages.
10b. The infrastructure allows easy addition of other types of memory to control 11b. The infrastructure allows easy addition of other types of memory to control
11c. Provides *zero overhead* for non memory controller users 12c. Provides *zero overhead* for non memory controller users
12d. Provides a double LRU: global memory pressure causes reclaim from the 13d. Provides a double LRU: global memory pressure causes reclaim from the
13 global LRU; a cgroup on hitting a limit, reclaims from the per 14 global LRU; a cgroup on hitting a limit, reclaims from the per
14 cgroup LRU 15 cgroup LRU
15 16
16NOTE: Swap Cache (unmapped) is not accounted now.
17
18Benefits and Purpose of the memory controller 17Benefits and Purpose of the memory controller
19 18
20The memory controller isolates the memory behaviour of a group of tasks 19The memory controller isolates the memory behaviour of a group of tasks
@@ -153,14 +152,19 @@ When swap is accounted, following files are added.
153 152
154usage of mem+swap is limited by memsw.limit_in_bytes. 153usage of mem+swap is limited by memsw.limit_in_bytes.
155 154
156Note: why 'mem+swap' rather than swap. 155* why 'mem+swap' rather than swap.
157The global LRU(kswapd) can swap out arbitrary pages. Swap-out means 156The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
158to move account from memory to swap...there is no change in usage of 157to move account from memory to swap...there is no change in usage of
159mem+swap. 158mem+swap. In other words, when we want to limit the usage of swap without
159affecting global LRU, mem+swap limit is better than just limiting swap from
160OS point of view.
160 161
161In other words, when we want to limit the usage of swap without affecting 162* What happens when a cgroup hits memory.memsw.limit_in_bytes
162global LRU, mem+swap limit is better than just limiting swap from OS point 163When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out
163of view. 164in this cgroup. Then, swap-out will not be done by cgroup routine and file
165caches are dropped. But as mentioned above, global LRU can do swapout memory
166from it for sanity of the system's memory management state. You can't forbid
167it by cgroup.
164 168
1652.5 Reclaim 1692.5 Reclaim
166 170
@@ -205,6 +209,7 @@ We can alter the memory limit:
205 209
206NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, 210NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
207mega or gigabytes. 211mega or gigabytes.
212NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
208 213
209# cat /cgroups/0/memory.limit_in_bytes 214# cat /cgroups/0/memory.limit_in_bytes
2104194304 2154194304
@@ -290,34 +295,44 @@ will be charged as a new owner of it.
290 moved to the parent. If you want to avoid that, force_empty will be useful. 295 moved to the parent. If you want to avoid that, force_empty will be useful.
291 296
2925.2 stat file 2975.2 stat file
293 memory.stat file includes following statistics (now) 298
294 cache - # of pages from page-cache and shmem. 299memory.stat file includes following statistics
295 rss - # of pages from anonymous memory. 300
296 pgpgin - # of event of charging 301cache - # of bytes of page cache memory.
297 pgpgout - # of event of uncharging 302rss - # of bytes of anonymous and swap cache memory.
298 active_anon - # of pages on active lru of anon, shmem. 303pgpgin - # of pages paged in (equivalent to # of charging events).
299 inactive_anon - # of pages on active lru of anon, shmem 304pgpgout - # of pages paged out (equivalent to # of uncharging events).
300 active_file - # of pages on active lru of file-cache 305active_anon - # of bytes of anonymous and swap cache memory on active
301 inactive_file - # of pages on inactive lru of file cache 306 lru list.
302 unevictable - # of pages cannot be reclaimed.(mlocked etc) 307inactive_anon - # of bytes of anonymous memory and swap cache memory on
303 308 inactive lru list.
304 Below is depend on CONFIG_DEBUG_VM. 309active_file - # of bytes of file-backed memory on active lru list.
305 inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) 310inactive_file - # of bytes of file-backed memory on inactive lru list.
306 recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) 311unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc).
307 recent_rotated_file - VM internal parameter. (see mm/vmscan.c) 312
308 recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) 313The following additional stats are dependent on CONFIG_DEBUG_VM.
309 recent_scanned_file - VM internal parameter. (see mm/vmscan.c) 314
310 315inactive_ratio - VM internal parameter. (see mm/page_alloc.c)
311 Memo: 316recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
317recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
318recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
319recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
320
321Memo:
312 recent_rotated means recent frequency of lru rotation. 322 recent_rotated means recent frequency of lru rotation.
313 recent_scanned means recent # of scans to lru. 323 recent_scanned means recent # of scans to lru.
314 showing for better debug please see the code for meanings. 324 showing for better debug please see the code for meanings.
315 325
326Note:
327 Only anonymous and swap cache memory is listed as part of 'rss' stat.
328 This should not be confused with the true 'resident set size' or the
329 amount of physical memory used by the cgroup. Per-cgroup rss
330 accounting is not done yet.
316 331
3175.3 swappiness 3325.3 swappiness
318 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. 333 Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
319 334
320 Following cgroup's swapiness can't be changed. 335 Following cgroups' swapiness can't be changed.
321 - root cgroup (uses /proc/sys/vm/swappiness). 336 - root cgroup (uses /proc/sys/vm/swappiness).
322 - a cgroup which uses hierarchy and it has child cgroup. 337 - a cgroup which uses hierarchy and it has child cgroup.
323 - a cgroup which uses hierarchy and not the root of hierarchy. 338 - a cgroup which uses hierarchy and not the root of hierarchy.
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index f196ac1d7d25..95b24d766eab 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -47,13 +47,18 @@ to work with it.
47 47
482. Basic accounting routines 482. Basic accounting routines
49 49
50 a. void res_counter_init(struct res_counter *rc) 50 a. void res_counter_init(struct res_counter *rc,
51 struct res_counter *rc_parent)
51 52
52 Initializes the resource counter. As usual, should be the first 53 Initializes the resource counter. As usual, should be the first
53 routine called for a new counter. 54 routine called for a new counter.
54 55
55 b. int res_counter_charge[_locked] 56 The struct res_counter *parent can be used to define a hierarchical
56 (struct res_counter *rc, unsigned long val) 57 child -> parent relationship directly in the res_counter structure,
58 NULL can be used to define no relationship.
59
60 c. int res_counter_charge(struct res_counter *rc, unsigned long val,
61 struct res_counter **limit_fail_at)
57 62
58 When a resource is about to be allocated it has to be accounted 63 When a resource is about to be allocated it has to be accounted
59 with the appropriate resource counter (controller should determine 64 with the appropriate resource counter (controller should determine
@@ -67,15 +72,25 @@ to work with it.
67 * if the charging is performed first, then it should be uncharged 72 * if the charging is performed first, then it should be uncharged
68 on error path (if the one is called). 73 on error path (if the one is called).
69 74
70 c. void res_counter_uncharge[_locked] 75 If the charging fails and a hierarchical dependency exists, the
76 limit_fail_at parameter is set to the particular res_counter element
77 where the charging failed.
78
79 d. int res_counter_charge_locked
80 (struct res_counter *rc, unsigned long val)
81
82 The same as res_counter_charge(), but it must not acquire/release the
83 res_counter->lock internally (it must be called with res_counter->lock
84 held).
85
86 e. void res_counter_uncharge[_locked]
71 (struct res_counter *rc, unsigned long val) 87 (struct res_counter *rc, unsigned long val)
72 88
73 When a resource is released (freed) it should be de-accounted 89 When a resource is released (freed) it should be de-accounted
74 from the resource counter it was accounted to. This is called 90 from the resource counter it was accounted to. This is called
75 "uncharging". 91 "uncharging".
76 92
77 The _locked routines imply that the res_counter->lock is taken. 93 The _locked routines imply that the res_counter->lock is taken.
78
79 94
80 2.1 Other accounting routines 95 2.1 Other accounting routines
81 96
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 6977c178729a..f688eba87704 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -41,6 +41,12 @@ void cn_test_callback(void *data)
41 msg->seq, msg->ack, msg->len, (char *)msg->data); 41 msg->seq, msg->ack, msg->len, (char *)msg->data);
42} 42}
43 43
44/*
45 * Do not remove this function even if no one is using it as
46 * this is an example of how to get notifications about new
47 * connector user registration
48 */
49#if 0
44static int cn_test_want_notify(void) 50static int cn_test_want_notify(void)
45{ 51{
46 struct cn_ctl_msg *ctl; 52 struct cn_ctl_msg *ctl;
@@ -117,6 +123,7 @@ nlmsg_failure:
117 kfree_skb(skb); 123 kfree_skb(skb);
118 return -EINVAL; 124 return -EINVAL;
119} 125}
126#endif
120 127
121static u32 cn_test_timer_counter; 128static u32 cn_test_timer_counter;
122static void cn_test_timer_func(unsigned long __data) 129static void cn_test_timer_func(unsigned long __data)
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 43c743903dd7..75a58d14d3cf 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -155,7 +155,7 @@ actual frequency must be determined using the following rules:
155- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal 155- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal
156 target_freq. ("H for highest, but no higher than") 156 target_freq. ("H for highest, but no higher than")
157 157
158Here again the frequency table helper might assist you - see section 3 158Here again the frequency table helper might assist you - see section 2
159for details. 159for details.
160 160
161 161
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index 5b0cfa67aff9..aed082f49d09 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -117,10 +117,30 @@ accessible parameters:
117sampling_rate: measured in uS (10^-6 seconds), this is how often you 117sampling_rate: measured in uS (10^-6 seconds), this is how often you
118want the kernel to look at the CPU usage and to make decisions on 118want the kernel to look at the CPU usage and to make decisions on
119what to do about the frequency. Typically this is set to values of 119what to do about the frequency. Typically this is set to values of
120around '10000' or more. 120around '10000' or more. It's default value is (cmp. with users-guide.txt):
121 121transition_latency * 1000
122show_sampling_rate_(min|max): the minimum and maximum sampling rates 122Be aware that transition latency is in ns and sampling_rate is in us, so you
123available that you may set 'sampling_rate' to. 123get the same sysfs value by default.
124Sampling rate should always get adjusted considering the transition latency
125To set the sampling rate 750 times as high as the transition latency
126in the bash (as said, 1000 is default), do:
127echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \
128 >ondemand/sampling_rate
129
130show_sampling_rate_min:
131The sampling rate is limited by the HW transition latency:
132transition_latency * 100
133Or by kernel restrictions:
134If CONFIG_NO_HZ is set, the limit is 10ms fixed.
135If CONFIG_NO_HZ is not set or no_hz=off boot parameter is used, the
136limits depend on the CONFIG_HZ option:
137HZ=1000: min=20000us (20ms)
138HZ=250: min=80000us (80ms)
139HZ=100: min=200000us (200ms)
140The highest value of kernel and HW latency restrictions is shown and
141used as the minimum sampling rate.
142
143show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT.
124 144
125up_threshold: defines what the average CPU usage between the samplings 145up_threshold: defines what the average CPU usage between the samplings
126of 'sampling_rate' needs to be for the kernel to make a decision on 146of 'sampling_rate' needs to be for the kernel to make a decision on
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 917918f84fc7..5d5f5fadd1c2 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -31,7 +31,6 @@ Contents:
31 31
323. How to change the CPU cpufreq policy and/or speed 323. How to change the CPU cpufreq policy and/or speed
333.1 Preferred interface: sysfs 333.1 Preferred interface: sysfs
343.2 Deprecated interfaces
35 34
36 35
37 36
@@ -152,6 +151,18 @@ cpuinfo_min_freq : this file shows the minimum operating
152 frequency the processor can run at(in kHz) 151 frequency the processor can run at(in kHz)
153cpuinfo_max_freq : this file shows the maximum operating 152cpuinfo_max_freq : this file shows the maximum operating
154 frequency the processor can run at(in kHz) 153 frequency the processor can run at(in kHz)
154cpuinfo_transition_latency The time it takes on this CPU to
155 switch between two frequencies in nano
156 seconds. If unknown or known to be
157 that high that the driver does not
158 work with the ondemand governor, -1
159 (CPUFREQ_ETERNAL) will be returned.
160 Using this information can be useful
161 to choose an appropriate polling
162 frequency for a kernel governor or
163 userspace daemon. Make sure to not
164 switch the frequency too often
165 resulting in performance loss.
155scaling_driver : this file shows what cpufreq driver is 166scaling_driver : this file shows what cpufreq driver is
156 used to set the frequency on this CPU 167 used to set the frequency on this CPU
157 168
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 45932ec21cee..b41f3e58aefa 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -18,11 +18,11 @@ For an architecture to support this feature, it must define some of
18these macros in include/asm-XXX/topology.h: 18these macros in include/asm-XXX/topology.h:
19#define topology_physical_package_id(cpu) 19#define topology_physical_package_id(cpu)
20#define topology_core_id(cpu) 20#define topology_core_id(cpu)
21#define topology_thread_siblings(cpu) 21#define topology_thread_cpumask(cpu)
22#define topology_core_siblings(cpu) 22#define topology_core_cpumask(cpu)
23 23
24The type of **_id is int. 24The type of **_id is int.
25The type of siblings is cpumask_t. 25The type of siblings is (const) struct cpumask *.
26 26
27To be consistent on all architectures, include/linux/topology.h 27To be consistent on all architectures, include/linux/topology.h
28provides default definitions for any of the above macros that are 28provides default definitions for any of the above macros that are
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index c11b931f8f98..15174985ad08 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -76,9 +76,9 @@ Do the steps below to download the BIOS image.
76 76
77The /sys/class/firmware/dell_rbu/ entries will remain till the following is 77The /sys/class/firmware/dell_rbu/ entries will remain till the following is
78done. 78done.
79echo -1 > /sys/class/firmware/dell_rbu/loading. 79echo -1 > /sys/class/firmware/dell_rbu/loading
80Until this step is completed the driver cannot be unloaded. 80Until this step is completed the driver cannot be unloaded.
81Also echoing either mono ,packet or init in to image_type will free up the 81Also echoing either mono, packet or init in to image_type will free up the
82memory allocated by the driver. 82memory allocated by the driver.
83 83
84If a user by accident executes steps 1 and 3 above without executing step 2; 84If a user by accident executes steps 1 and 3 above without executing step 2;
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index dd48132a74dd..f622c1e9f0f9 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting
@@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been
119done. When done properly, though, it is time well spent. 119done. When done properly, though, it is time well spent.
120 120
121 121
1225.4: PATCH FORMATTING 1225.4: PATCH FORMATTING AND CHANGELOGS
123 123
124So now you have a perfect series of patches for posting, but the work is 124So now you have a perfect series of patches for posting, but the work is
125not done quite yet. Each patch needs to be formatted into a message which 125not done quite yet. Each patch needs to be formatted into a message which
@@ -146,8 +146,33 @@ that end, each patch will be composed of the following:
146 - One or more tag lines, with, at a minimum, one Signed-off-by: line from 146 - One or more tag lines, with, at a minimum, one Signed-off-by: line from
147 the author of the patch. Tags will be described in more detail below. 147 the author of the patch. Tags will be described in more detail below.
148 148
149The above three items should, normally, be the text used when committing 149The items above, together, form the changelog for the patch. Writing good
150the change to a revision control system. They are followed by: 150changelogs is a crucial but often-neglected art; it's worth spending
151another moment discussing this issue. When writing a changelog, you should
152bear in mind that a number of different people will be reading your words.
153These include subsystem maintainers and reviewers who need to decide
154whether the patch should be included, distributors and other maintainers
155trying to decide whether a patch should be backported to other kernels, bug
156hunters wondering whether the patch is responsible for a problem they are
157chasing, users who want to know how the kernel has changed, and more. A
158good changelog conveys the needed information to all of these people in the
159most direct and concise way possible.
160
161To that end, the summary line should describe the effects of and motivation
162for the change as well as possible given the one-line constraint. The
163detailed description can then amplify on those topics and provide any
164needed additional information. If the patch fixes a bug, cite the commit
165which introduced the bug if possible. If a problem is associated with
166specific log or compiler output, include that output to help others
167searching for a solution to the same problem. If the change is meant to
168support other changes coming in later patch, say so. If internal APIs are
169changed, detail those changes and how other developers should respond. In
170general, the more you can put yourself into the shoes of everybody who will
171be reading your changelog, the better that changelog (and the kernel as a
172whole) will be.
173
174Needless to say, the changelog should be the text used when committing the
175change to a revision control system. It will be followed by:
151 176
152 - The patch itself, in the unified ("-u") patch format. Using the "-p" 177 - The patch itself, in the unified ("-u") patch format. Using the "-p"
153 option to diff will associate function names with changes, making the 178 option to diff will associate function names with changes, making the
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 2be08240ee80..53d64d382343 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -1,9 +1,9 @@
1 1
2 LINUX ALLOCATED DEVICES (2.6+ version) 2 LINUX ALLOCATED DEVICES (2.6+ version)
3 3
4 Maintained by Torben Mathiasen <device@lanana.org> 4 Maintained by Alan Cox <device@lanana.org>
5 5
6 Last revised: 29 November 2006 6 Last revised: 6th April 2009
7 7
8This list is the Linux Device List, the official registry of allocated 8This list is the Linux Device List, the official registry of allocated
9device numbers and /dev directory nodes for the Linux operating 9device numbers and /dev directory nodes for the Linux operating
@@ -67,6 +67,11 @@ up to date. Due to the number of registrations I have to maintain it
67in "batch mode", so there is likely additional registrations that 67in "batch mode", so there is likely additional registrations that
68haven't been listed yet. 68haven't been listed yet.
69 69
70Fourth, remember that Linux now has extensive support for dynamic allocation
71of device numbering and can use sysfs and udev to handle the naming needs.
72There are still some exceptions in the serial and boot device area. Before
73asking for a device number make sure you actually need one.
74
70Finally, sometimes I have to play "namespace police." Please don't be 75Finally, sometimes I have to play "namespace police." Please don't be
71offended. I often get submissions for /dev names that would be bound 76offended. I often get submissions for /dev names that would be bound
72to cause conflicts down the road. I am trying to avoid getting in a 77to cause conflicts down the road. I am trying to avoid getting in a
@@ -101,7 +106,7 @@ Your cooperation is appreciated.
101 0 = /dev/ram0 First RAM disk 106 0 = /dev/ram0 First RAM disk
102 1 = /dev/ram1 Second RAM disk 107 1 = /dev/ram1 Second RAM disk
103 ... 108 ...
104 250 = /dev/initrd Initial RAM disk {2.6} 109 250 = /dev/initrd Initial RAM disk
105 110
106 Older kernels had /dev/ramdisk (1, 1) here. 111 Older kernels had /dev/ramdisk (1, 1) here.
107 /dev/initrd refers to a RAM disk which was preloaded 112 /dev/initrd refers to a RAM disk which was preloaded
@@ -340,7 +345,7 @@ Your cooperation is appreciated.
340 14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen 345 14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen
341 15 = /dev/touchscreen/mk712 MK712 touchscreen 346 15 = /dev/touchscreen/mk712 MK712 touchscreen
342 128 = /dev/beep Fancy beep device 347 128 = /dev/beep Fancy beep device
343 129 = /dev/modreq Kernel module load request {2.6} 348 129 =
344 130 = /dev/watchdog Watchdog timer port 349 130 = /dev/watchdog Watchdog timer port
345 131 = /dev/temperature Machine internal temperature 350 131 = /dev/temperature Machine internal temperature
346 132 = /dev/hwtrap Hardware fault trap 351 132 = /dev/hwtrap Hardware fault trap
@@ -350,10 +355,10 @@ Your cooperation is appreciated.
350 139 = /dev/openprom SPARC OpenBoot PROM 355 139 = /dev/openprom SPARC OpenBoot PROM
351 140 = /dev/relay8 Berkshire Products Octal relay card 356 140 = /dev/relay8 Berkshire Products Octal relay card
352 141 = /dev/relay16 Berkshire Products ISO-16 relay card 357 141 = /dev/relay16 Berkshire Products ISO-16 relay card
353 142 = /dev/msr x86 model-specific registers {2.6} 358 142 =
354 143 = /dev/pciconf PCI configuration space 359 143 = /dev/pciconf PCI configuration space
355 144 = /dev/nvram Non-volatile configuration RAM 360 144 = /dev/nvram Non-volatile configuration RAM
356 145 = /dev/hfmodem Soundcard shortwave modem control {2.6} 361 145 = /dev/hfmodem Soundcard shortwave modem control
357 146 = /dev/graphics Linux/SGI graphics device 362 146 = /dev/graphics Linux/SGI graphics device
358 147 = /dev/opengl Linux/SGI OpenGL pipe 363 147 = /dev/opengl Linux/SGI OpenGL pipe
359 148 = /dev/gfx Linux/SGI graphics effects device 364 148 = /dev/gfx Linux/SGI graphics effects device
@@ -435,6 +440,9 @@ Your cooperation is appreciated.
435 228 = /dev/hpet HPET driver 440 228 = /dev/hpet HPET driver
436 229 = /dev/fuse Fuse (virtual filesystem in user-space) 441 229 = /dev/fuse Fuse (virtual filesystem in user-space)
437 230 = /dev/midishare MidiShare driver 442 230 = /dev/midishare MidiShare driver
443 231 = /dev/snapshot System memory snapshot device
444 232 = /dev/kvm Kernel-based virtual machine (hardware virtualization extensions)
445 233 = /dev/kmview View-OS A process with a view
438 240-254 Reserved for local use 446 240-254 Reserved for local use
439 255 Reserved for MISC_DYNAMIC_MINOR 447 255 Reserved for MISC_DYNAMIC_MINOR
440 448
@@ -466,10 +474,7 @@ Your cooperation is appreciated.
466 The device names specified are proposed -- if there 474 The device names specified are proposed -- if there
467 are "standard" names for these devices, please let me know. 475 are "standard" names for these devices, please let me know.
468 476
469 12 block MSCDEX CD-ROM callback support {2.6} 477 12 block
470 0 = /dev/dos_cd0 First MSCDEX CD-ROM
471 1 = /dev/dos_cd1 Second MSCDEX CD-ROM
472 ...
473 478
474 13 char Input core 479 13 char Input core
475 0 = /dev/input/js0 First joystick 480 0 = /dev/input/js0 First joystick
@@ -498,7 +503,7 @@ Your cooperation is appreciated.
498 2 = /dev/midi00 First MIDI port 503 2 = /dev/midi00 First MIDI port
499 3 = /dev/dsp Digital audio 504 3 = /dev/dsp Digital audio
500 4 = /dev/audio Sun-compatible digital audio 505 4 = /dev/audio Sun-compatible digital audio
501 6 = /dev/sndstat Sound card status information {2.6} 506 6 =
502 7 = /dev/audioctl SPARC audio control device 507 7 = /dev/audioctl SPARC audio control device
503 8 = /dev/sequencer2 Sequencer -- alternate device 508 8 = /dev/sequencer2 Sequencer -- alternate device
504 16 = /dev/mixer1 Second soundcard mixer control 509 16 = /dev/mixer1 Second soundcard mixer control
@@ -510,14 +515,7 @@ Your cooperation is appreciated.
510 34 = /dev/midi02 Third MIDI port 515 34 = /dev/midi02 Third MIDI port
511 50 = /dev/midi03 Fourth MIDI port 516 50 = /dev/midi03 Fourth MIDI port
512 517
513 14 block BIOS harddrive callback support {2.6} 518 14 block
514 0 = /dev/dos_hda First BIOS harddrive whole disk
515 64 = /dev/dos_hdb Second BIOS harddrive whole disk
516 128 = /dev/dos_hdc Third BIOS harddrive whole disk
517 192 = /dev/dos_hdd Fourth BIOS harddrive whole disk
518
519 Partitions are handled in the same way as IDE disks
520 (see major number 3).
521 519
522 15 char Joystick 520 15 char Joystick
523 0 = /dev/js0 First analog joystick 521 0 = /dev/js0 First analog joystick
@@ -535,14 +533,14 @@ Your cooperation is appreciated.
535 16 block GoldStar CD-ROM 533 16 block GoldStar CD-ROM
536 0 = /dev/gscd GoldStar CD-ROM 534 0 = /dev/gscd GoldStar CD-ROM
537 535
538 17 char Chase serial card 536 17 char OBSOLETE (was Chase serial card)
539 0 = /dev/ttyH0 First Chase port 537 0 = /dev/ttyH0 First Chase port
540 1 = /dev/ttyH1 Second Chase port 538 1 = /dev/ttyH1 Second Chase port
541 ... 539 ...
542 17 block Optics Storage CD-ROM 540 17 block Optics Storage CD-ROM
543 0 = /dev/optcd Optics Storage CD-ROM 541 0 = /dev/optcd Optics Storage CD-ROM
544 542
545 18 char Chase serial card - alternate devices 543 18 char OBSOLETE (was Chase serial card - alternate devices)
546 0 = /dev/cuh0 Callout device for ttyH0 544 0 = /dev/cuh0 Callout device for ttyH0
547 1 = /dev/cuh1 Callout device for ttyH1 545 1 = /dev/cuh1 Callout device for ttyH1
548 ... 546 ...
@@ -644,8 +642,7 @@ Your cooperation is appreciated.
644 2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2 642 2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2
645 3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3 643 3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3
646 644
647 26 char Quanta WinVision frame grabber {2.6} 645 26 char
648 0 = /dev/wvisfgrab Quanta WinVision frame grabber
649 646
650 26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM 647 26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM
651 0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0 648 0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0
@@ -872,7 +869,7 @@ Your cooperation is appreciated.
872 and "user level packet I/O." This board is also 869 and "user level packet I/O." This board is also
873 accessible as a standard networking "eth" device. 870 accessible as a standard networking "eth" device.
874 871
875 38 block Reserved for Linux/AP+ 872 38 block OBSOLETE (was Linux/AP+)
876 873
877 39 char ML-16P experimental I/O board 874 39 char ML-16P experimental I/O board
878 0 = /dev/ml16pa-a0 First card, first analog channel 875 0 = /dev/ml16pa-a0 First card, first analog channel
@@ -892,29 +889,16 @@ Your cooperation is appreciated.
892 50 = /dev/ml16pb-c1 Second card, second counter/timer 889 50 = /dev/ml16pb-c1 Second card, second counter/timer
893 51 = /dev/ml16pb-c2 Second card, third counter/timer 890 51 = /dev/ml16pb-c2 Second card, third counter/timer
894 ... 891 ...
895 39 block Reserved for Linux/AP+ 892 39 block
896 893
897 40 char Matrox Meteor frame grabber {2.6} 894 40 char
898 0 = /dev/mmetfgrab Matrox Meteor frame grabber
899 895
900 40 block Syquest EZ135 parallel port removable drive 896 40 block
901 0 = /dev/eza Parallel EZ135 drive, whole disk
902
903 This device is obsolete and will be removed in a
904 future version of Linux. It has been replaced with
905 the parallel port IDE disk driver at major number 45.
906 Partitions are handled in the same way as IDE disks
907 (see major number 3).
908 897
909 41 char Yet Another Micro Monitor 898 41 char Yet Another Micro Monitor
910 0 = /dev/yamm Yet Another Micro Monitor 899 0 = /dev/yamm Yet Another Micro Monitor
911 900
912 41 block MicroSolutions BackPack parallel port CD-ROM 901 41 block
913 0 = /dev/bpcd BackPack CD-ROM
914
915 This device is obsolete and will be removed in a
916 future version of Linux. It has been replaced with
917 the parallel port ATAPI CD-ROM driver at major number 46.
918 902
919 42 char Demo/sample use 903 42 char Demo/sample use
920 904
@@ -1681,13 +1665,7 @@ Your cooperation is appreciated.
1681 disks (see major number 3) except that the limit on 1665 disks (see major number 3) except that the limit on
1682 partitions is 15. 1666 partitions is 15.
1683 1667
1684 93 char IBM Smart Capture Card frame grabber {2.6} 1668 93 char
1685 0 = /dev/iscc0 First Smart Capture Card
1686 1 = /dev/iscc1 Second Smart Capture Card
1687 ...
1688 128 = /dev/isccctl0 First Smart Capture Card control
1689 129 = /dev/isccctl1 Second Smart Capture Card control
1690 ...
1691 1669
1692 93 block NAND Flash Translation Layer filesystem 1670 93 block NAND Flash Translation Layer filesystem
1693 0 = /dev/nftla First NFTL layer 1671 0 = /dev/nftla First NFTL layer
@@ -1695,10 +1673,7 @@ Your cooperation is appreciated.
1695 ... 1673 ...
1696 240 = /dev/nftlp 16th NTFL layer 1674 240 = /dev/nftlp 16th NTFL layer
1697 1675
1698 94 char miroVIDEO DC10/30 capture/playback device {2.6} 1676 94 char
1699 0 = /dev/dcxx0 First capture card
1700 1 = /dev/dcxx1 Second capture card
1701 ...
1702 1677
1703 94 block IBM S/390 DASD block storage 1678 94 block IBM S/390 DASD block storage
1704 0 = /dev/dasda First DASD device, major 1679 0 = /dev/dasda First DASD device, major
@@ -1791,11 +1766,7 @@ Your cooperation is appreciated.
1791 ... 1766 ...
1792 15 = /dev/amiraid/ar?p15 15th partition 1767 15 = /dev/amiraid/ar?p15 15th partition
1793 1768
1794102 char Philips SAA5249 Teletext signal decoder {2.6} 1769102 char
1795 0 = /dev/tlk0 First Teletext decoder
1796 1 = /dev/tlk1 Second Teletext decoder
1797 2 = /dev/tlk2 Third Teletext decoder
1798 3 = /dev/tlk3 Fourth Teletext decoder
1799 1770
1800102 block Compressed block device 1771102 block Compressed block device
1801 0 = /dev/cbd/a First compressed block device, whole device 1772 0 = /dev/cbd/a First compressed block device, whole device
@@ -1916,10 +1887,7 @@ Your cooperation is appreciated.
1916 DAC960 (see major number 48) except that the limit on 1887 DAC960 (see major number 48) except that the limit on
1917 partitions is 15. 1888 partitions is 15.
1918 1889
1919111 char Philips SAA7146-based audio/video card {2.6} 1890111 char
1920 0 = /dev/av0 First A/V card
1921 1 = /dev/av1 Second A/V card
1922 ...
1923 1891
1924111 block Compaq Next Generation Drive Array, eighth controller 1892111 block Compaq Next Generation Drive Array, eighth controller
1925 0 = /dev/cciss/c7d0 First logical drive, whole disk 1893 0 = /dev/cciss/c7d0 First logical drive, whole disk
@@ -2079,8 +2047,8 @@ Your cooperation is appreciated.
2079 ... 2047 ...
2080 2048
2081119 char VMware virtual network control 2049119 char VMware virtual network control
2082 0 = /dev/vmnet0 1st virtual network 2050 0 = /dev/vnet0 1st virtual network
2083 1 = /dev/vmnet1 2nd virtual network 2051 1 = /dev/vnet1 2nd virtual network
2084 ... 2052 ...
2085 2053
2086120-127 char LOCAL/EXPERIMENTAL USE 2054120-127 char LOCAL/EXPERIMENTAL USE
@@ -2450,7 +2418,7 @@ Your cooperation is appreciated.
2450 2 = /dev/raw/raw2 Second raw I/O device 2418 2 = /dev/raw/raw2 Second raw I/O device
2451 ... 2419 ...
2452 2420
2453163 char UNASSIGNED (was Radio Tech BIM-XXX-RS232 radio modem - see 51) 2421163 char
2454 2422
2455164 char Chase Research AT/PCI-Fast serial card 2423164 char Chase Research AT/PCI-Fast serial card
2456 0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0 2424 0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0
@@ -2542,6 +2510,12 @@ Your cooperation is appreciated.
2542 1 = /dev/clanvi1 Second cLAN adapter 2510 1 = /dev/clanvi1 Second cLAN adapter
2543 ... 2511 ...
2544 2512
2513179 block MMC block devices
2514 0 = /dev/mmcblk0 First SD/MMC card
2515 1 = /dev/mmcblk0p1 First partition on first MMC card
2516 8 = /dev/mmcblk1 Second SD/MMC card
2517 ...
2518
2545179 char CCube DVXChip-based PCI products 2519179 char CCube DVXChip-based PCI products
2546 0 = /dev/dvxirq0 First DVX device 2520 0 = /dev/dvxirq0 First DVX device
2547 1 = /dev/dvxirq1 Second DVX device 2521 1 = /dev/dvxirq1 Second DVX device
@@ -2560,6 +2534,9 @@ Your cooperation is appreciated.
2560 96 = /dev/usb/hiddev0 1st USB HID device 2534 96 = /dev/usb/hiddev0 1st USB HID device
2561 ... 2535 ...
2562 111 = /dev/usb/hiddev15 16th USB HID device 2536 111 = /dev/usb/hiddev15 16th USB HID device
2537 112 = /dev/usb/auer0 1st auerswald ISDN device
2538 ...
2539 127 = /dev/usb/auer15 16th auerswald ISDN device
2563 128 = /dev/usb/brlvgr0 First Braille Voyager device 2540 128 = /dev/usb/brlvgr0 First Braille Voyager device
2564 ... 2541 ...
2565 131 = /dev/usb/brlvgr3 Fourth Braille Voyager device 2542 131 = /dev/usb/brlvgr3 Fourth Braille Voyager device
@@ -2810,6 +2787,20 @@ Your cooperation is appreciated.
2810 ... 2787 ...
2811 190 = /dev/ttyUL3 Xilinx uartlite - port 3 2788 190 = /dev/ttyUL3 Xilinx uartlite - port 3
2812 191 = /dev/xvc0 Xen virtual console - port 0 2789 191 = /dev/xvc0 Xen virtual console - port 0
2790 192 = /dev/ttyPZ0 pmac_zilog - port 0
2791 ...
2792 195 = /dev/ttyPZ3 pmac_zilog - port 3
2793 196 = /dev/ttyTX0 TX39/49 serial port 0
2794 ...
2795 204 = /dev/ttyTX7 TX39/49 serial port 7
2796 205 = /dev/ttySC0 SC26xx serial port 0
2797 206 = /dev/ttySC1 SC26xx serial port 1
2798 207 = /dev/ttySC2 SC26xx serial port 2
2799 208 = /dev/ttySC3 SC26xx serial port 3
2800 209 = /dev/ttyMAX0 MAX3100 serial port 0
2801 210 = /dev/ttyMAX1 MAX3100 serial port 1
2802 211 = /dev/ttyMAX2 MAX3100 serial port 2
2803 212 = /dev/ttyMAX3 MAX3100 serial port 3
2813 2804
2814205 char Low-density serial ports (alternate device) 2805205 char Low-density serial ports (alternate device)
2815 0 = /dev/culu0 Callout device for ttyLU0 2806 0 = /dev/culu0 Callout device for ttyLU0
@@ -3145,6 +3136,20 @@ Your cooperation is appreciated.
3145 1 = /dev/blockrom1 Second ROM card's translation layer interface 3136 1 = /dev/blockrom1 Second ROM card's translation layer interface
3146 ... 3137 ...
3147 3138
3139259 block Block Extended Major
3140 Used dynamically to hold additional partition minor
3141 numbers and allow large numbers of partitions per device
3142
3143259 char FPGA configuration interfaces
3144 0 = /dev/icap0 First Xilinx internal configuration
3145 1 = /dev/icap1 Second Xilinx internal configuration
3146
3147260 char OSD (Object-based-device) SCSI Device
3148 0 = /dev/osd0 First OSD Device
3149 1 = /dev/osd1 Second OSD Device
3150 ...
3151 255 = /dev/osd255 256th OSD Device
3152
3148 **** ADDITIONAL /dev DIRECTORY ENTRIES 3153 **** ADDITIONAL /dev DIRECTORY ENTRIES
3149 3154
3150This section details additional entries that should or may exist in 3155This section details additional entries that should or may exist in
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 1e89a51ea49b..88519daab6e9 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -62,7 +62,6 @@ aic7*reg_print.c*
62aic7*seq.h* 62aic7*seq.h*
63aicasm 63aicasm
64aicdb.h* 64aicdb.h*
65asm
66asm-offsets.h 65asm-offsets.h
67asm_offsets.h 66asm_offsets.h
68autoconf.h* 67autoconf.h*
diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt
index a7cbfff40d07..a124f3126b0d 100644
--- a/Documentation/driver-model/device.txt
+++ b/Documentation/driver-model/device.txt
@@ -162,3 +162,35 @@ device_remove_file(dev,&dev_attr_power);
162 162
163The file name will be 'power' with a mode of 0644 (-rw-r--r--). 163The file name will be 'power' with a mode of 0644 (-rw-r--r--).
164 164
165Word of warning: While the kernel allows device_create_file() and
166device_remove_file() to be called on a device at any time, userspace has
167strict expectations on when attributes get created. When a new device is
168registered in the kernel, a uevent is generated to notify userspace (like
169udev) that a new device is available. If attributes are added after the
170device is registered, then userspace won't get notified and userspace will
171not know about the new attributes.
172
173This is important for device driver that need to publish additional
174attributes for a device at driver probe time. If the device driver simply
175calls device_create_file() on the device structure passed to it, then
176userspace will never be notified of the new attributes. Instead, it should
177probably use class_create() and class->dev_attrs to set up a list of
178desired attributes in the modules_init function, and then in the .probe()
179hook, and then use device_create() to create a new device as a child
180of the probed device. The new device will generate a new uevent and
181properly advertise the new attributes to userspace.
182
183For example, if a driver wanted to add the following attributes:
184struct device_attribute mydriver_attribs[] = {
185 __ATTR(port_count, 0444, port_count_show),
186 __ATTR(serial_number, 0444, serial_number_show),
187 NULL
188};
189
190Then in the module init function is would do:
191 mydriver_class = class_create(THIS_MODULE, "my_attrs");
192 mydriver_class.dev_attr = mydriver_attribs;
193
194And assuming 'dev' is the struct device passed into the probe hook, the driver
195probe function would do something like:
196 create_device(&mydriver_class, dev, chrdev, &private_data, "my_name");
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 387b8a720f4a..d79aead9418b 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -188,7 +188,7 @@ For example, you can do something like the following.
188 188
189 void my_midlayer_destroy_something() 189 void my_midlayer_destroy_something()
190 { 190 {
191 devres_release_group(dev, my_midlayer_create_soemthing); 191 devres_release_group(dev, my_midlayer_create_something);
192 } 192 }
193 193
194 194
diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt
index 83009fdcbbc8..2e2c2ea90ceb 100644
--- a/Documentation/driver-model/platform.txt
+++ b/Documentation/driver-model/platform.txt
@@ -169,3 +169,62 @@ three different ways to find such a match:
169 be probed later if another device registers. (Which is OK, since 169 be probed later if another device registers. (Which is OK, since
170 this interface is only for use with non-hotpluggable devices.) 170 this interface is only for use with non-hotpluggable devices.)
171 171
172
173Early Platform Devices and Drivers
174~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175The early platform interfaces provide platform data to platform device
176drivers early on during the system boot. The code is built on top of the
177early_param() command line parsing and can be executed very early on.
178
179Example: "earlyprintk" class early serial console in 6 steps
180
1811. Registering early platform device data
182~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183The architecture code registers platform device data using the function
184early_platform_add_devices(). In the case of early serial console this
185should be hardware configuration for the serial port. Devices registered
186at this point will later on be matched against early platform drivers.
187
1882. Parsing kernel command line
189~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190The architecture code calls parse_early_param() to parse the kernel
191command line. This will execute all matching early_param() callbacks.
192User specified early platform devices will be registered at this point.
193For the early serial console case the user can specify port on the
194kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
195the class string, "serial" is the name of the platfrom driver and
1960 is the platform device id. If the id is -1 then the dot and the
197id can be omitted.
198
1993. Installing early platform drivers belonging to a certain class
200~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
201The architecture code may optionally force registration of all early
202platform drivers belonging to a certain class using the function
203early_platform_driver_register_all(). User specified devices from
204step 2 have priority over these. This step is omitted by the serial
205driver example since the early serial driver code should be disabled
206unless the user has specified port on the kernel command line.
207
2084. Early platform driver registration
209~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
210Compiled-in platform drivers making use of early_platform_init() are
211automatically registered during step 2 or 3. The serial driver example
212should use early_platform_init("earlyprintk", &platform_driver).
213
2145. Probing of early platform drivers belonging to a certain class
215~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
216The architecture code calls early_platform_driver_probe() to match
217registered early platform devices associated with a certain class with
218registered early platform drivers. Matched devices will get probed().
219This step can be executed at any point during the early boot. As soon
220as possible may be good for the serial port case.
221
2226. Inside the early platform driver probe()
223~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
224The driver code needs to take special care during early boot, especially
225when it comes to memory allocation and interrupt registration. The code
226in the probe() function can use is_early_platform_device() to check if
227it is called at early platform device or at the regular platform device
228time. The early serial driver performs register_console() at this point.
229
230For further information, see <linux/platform_device.h>.
diff --git a/Documentation/dvb/get_dvb_firmware b/Documentation/dvb/get_dvb_firmware
index f2e908d7f90d..a52adfc9a57f 100644
--- a/Documentation/dvb/get_dvb_firmware
+++ b/Documentation/dvb/get_dvb_firmware
@@ -25,7 +25,7 @@ use IO::Handle;
25 "tda10046lifeview", "av7110", "dec2000t", "dec2540t", 25 "tda10046lifeview", "av7110", "dec2000t", "dec2540t",
26 "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004", 26 "dec3000s", "vp7041", "dibusb", "nxt2002", "nxt2004",
27 "or51211", "or51132_qam", "or51132_vsb", "bluebird", 27 "or51211", "or51132_qam", "or51132_vsb", "bluebird",
28 "opera1"); 28 "opera1", "cx231xx", "cx18", "cx23885", "pvrusb2" );
29 29
30# Check args 30# Check args
31syntax() if (scalar(@ARGV) != 1); 31syntax() if (scalar(@ARGV) != 1);
@@ -37,8 +37,8 @@ for ($i=0; $i < scalar(@components); $i++) {
37 $outfile = eval($cid); 37 $outfile = eval($cid);
38 die $@ if $@; 38 die $@ if $@;
39 print STDERR <<EOF; 39 print STDERR <<EOF;
40Firmware $outfile extracted successfully. 40Firmware(s) $outfile extracted successfully.
41Now copy it to either /usr/lib/hotplug/firmware or /lib/firmware 41Now copy it(they) to either /usr/lib/hotplug/firmware or /lib/firmware
42(depending on configuration of firmware hotplug). 42(depending on configuration of firmware hotplug).
43EOF 43EOF
44 exit(0); 44 exit(0);
@@ -112,7 +112,7 @@ sub tda10045 {
112 112
113sub tda10046 { 113sub tda10046 {
114 my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip"; 114 my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip";
115 my $url = "http://technotrend-online.com/download/software/219/$sourcefile"; 115 my $url = "http://www.tt-download.com/download/updates/219/$sourcefile";
116 my $hash = "6a7e1e2f2644b162ff0502367553c72d"; 116 my $hash = "6a7e1e2f2644b162ff0502367553c72d";
117 my $outfile = "dvb-fe-tda10046.fw"; 117 my $outfile = "dvb-fe-tda10046.fw";
118 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); 118 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
@@ -129,8 +129,8 @@ sub tda10046 {
129} 129}
130 130
131sub tda10046lifeview { 131sub tda10046lifeview {
132 my $sourcefile = "Drv_2.11.02.zip"; 132 my $sourcefile = "7%5Cdrv_2.11.02.zip";
133 my $url = "http://www.lifeview.com.tw/drivers/pci_card/FlyDVB-T/$sourcefile"; 133 my $url = "http://www.lifeview.hk/dbimages/document/$sourcefile";
134 my $hash = "1ea24dee4eea8fe971686981f34fd2e0"; 134 my $hash = "1ea24dee4eea8fe971686981f34fd2e0";
135 my $outfile = "dvb-fe-tda10046.fw"; 135 my $outfile = "dvb-fe-tda10046.fw";
136 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); 136 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
@@ -317,7 +317,7 @@ sub nxt2002 {
317 317
318sub nxt2004 { 318sub nxt2004 {
319 my $sourcefile = "AVerTVHD_MCE_A180_Drv_v1.2.2.16.zip"; 319 my $sourcefile = "AVerTVHD_MCE_A180_Drv_v1.2.2.16.zip";
320 my $url = "http://www.aver.com/support/Drivers/$sourcefile"; 320 my $url = "http://www.avermedia-usa.com/support/Drivers/$sourcefile";
321 my $hash = "111cb885b1e009188346d72acfed024c"; 321 my $hash = "111cb885b1e009188346d72acfed024c";
322 my $outfile = "dvb-fe-nxt2004.fw"; 322 my $outfile = "dvb-fe-nxt2004.fw";
323 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); 323 my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
@@ -345,6 +345,85 @@ sub or51211 {
345 $fwfile; 345 $fwfile;
346} 346}
347 347
348sub cx231xx {
349 my $fwfile = "v4l-cx231xx-avcore-01.fw";
350 my $url = "http://linuxtv.org/downloads/firmware/$fwfile";
351 my $hash = "7d3bb956dc9df0eafded2b56ba57cc42";
352
353 checkstandard();
354
355 wgetfile($fwfile, $url);
356 verify($fwfile, $hash);
357
358 $fwfile;
359}
360
361sub cx18 {
362 my $url = "http://linuxtv.org/downloads/firmware/";
363
364 my %files = (
365 'v4l-cx23418-apu.fw' => '588f081b562f5c653a3db1ad8f65939a',
366 'v4l-cx23418-cpu.fw' => 'b6c7ed64bc44b1a6e0840adaeac39d79',
367 'v4l-cx23418-dig.fw' => '95bc688d3e7599fd5800161e9971cc55',
368 );
369
370 checkstandard();
371
372 my $allfiles;
373 foreach my $fwfile (keys %files) {
374 wgetfile($fwfile, "$url/$fwfile");
375 verify($fwfile, $files{$fwfile});
376 $allfiles .= " $fwfile";
377 }
378
379 $allfiles =~ s/^\s//;
380
381 $allfiles;
382}
383
384sub cx23885 {
385 my $url = "http://linuxtv.org/downloads/firmware/";
386
387 my %files = (
388 'v4l-cx23885-avcore-01.fw' => 'a9f8f5d901a7fb42f552e1ee6384f3bb',
389 'v4l-cx23885-enc.fw' => 'a9f8f5d901a7fb42f552e1ee6384f3bb',
390 );
391
392 checkstandard();
393
394 my $allfiles;
395 foreach my $fwfile (keys %files) {
396 wgetfile($fwfile, "$url/$fwfile");
397 verify($fwfile, $files{$fwfile});
398 $allfiles .= " $fwfile";
399 }
400
401 $allfiles =~ s/^\s//;
402
403 $allfiles;
404}
405
406sub pvrusb2 {
407 my $url = "http://linuxtv.org/downloads/firmware/";
408
409 my %files = (
410 'v4l-cx25840.fw' => 'dadb79e9904fc8af96e8111d9cb59320',
411 );
412
413 checkstandard();
414
415 my $allfiles;
416 foreach my $fwfile (keys %files) {
417 wgetfile($fwfile, "$url/$fwfile");
418 verify($fwfile, $files{$fwfile});
419 $allfiles .= " $fwfile";
420 }
421
422 $allfiles =~ s/^\s//;
423
424 $allfiles;
425}
426
348sub or51132_qam { 427sub or51132_qam {
349 my $fwfile = "dvb-fe-or51132-qam.fw"; 428 my $fwfile = "dvb-fe-or51132-qam.fw";
350 my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; 429 my $url = "http://linuxtv.org/downloads/firmware/$fwfile";
diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt
new file mode 100644
index 000000000000..674c5663d346
--- /dev/null
+++ b/Documentation/dynamic-debug-howto.txt
@@ -0,0 +1,240 @@
1
2Introduction
3============
4
5This document describes how to use the dynamic debug (ddebug) feature.
6
7Dynamic debug is designed to allow you to dynamically enable/disable kernel
8code to obtain additional kernel information. Currently, if
9CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_debug() calls can be
10dynamically enabled per-callsite.
11
12Dynamic debug has even more useful features:
13
14 * Simple query language allows turning on and off debugging statements by
15 matching any combination of:
16
17 - source filename
18 - function name
19 - line number (including ranges of line numbers)
20 - module name
21 - format string
22
23 * Provides a debugfs control file: <debugfs>/dynamic_debug/control which can be
24 read to display the complete list of known debug statements, to help guide you
25
26Controlling dynamic debug Behaviour
27===============================
28
29The behaviour of pr_debug()/dev_debug()s are controlled via writing to a
30control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs
31filesystem, in order to make use of this feature. Subsequently, we refer to the
32control file as: <debugfs>/dynamic_debug/control. For example, if you want to
33enable printing from source file 'svcsock.c', line 1603 you simply do:
34
35nullarbor:~ # echo 'file svcsock.c line 1603 +p' >
36 <debugfs>/dynamic_debug/control
37
38If you make a mistake with the syntax, the write will fail thus:
39
40nullarbor:~ # echo 'file svcsock.c wtf 1 +p' >
41 <debugfs>/dynamic_debug/control
42-bash: echo: write error: Invalid argument
43
44Viewing Dynamic Debug Behaviour
45===========================
46
47You can view the currently configured behaviour of all the debug statements
48via:
49
50nullarbor:~ # cat <debugfs>/dynamic_debug/control
51# filename:lineno [module]function flags format
52/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA Module Removed, deregister RPC RDMA transport\012"
53/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline : %d\012"
54/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth : %d\012"
55/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests : %d\012"
56...
57
58
59You can also apply standard Unix text manipulation filters to this
60data, e.g.
61
62nullarbor:~ # grep -i rdma <debugfs>/dynamic_debug/control | wc -l
6362
64
65nullarbor:~ # grep -i tcp <debugfs>/dynamic_debug/control | wc -l
6642
67
68Note in particular that the third column shows the enabled behaviour
69flags for each debug statement callsite (see below for definitions of the
70flags). The default value, no extra behaviour enabled, is "-". So
71you can view all the debug statement callsites with any non-default flags:
72
73nullarbor:~ # awk '$3 != "-"' <debugfs>/dynamic_debug/control
74# filename:lineno [module]function flags format
75/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012"
76
77
78Command Language Reference
79==========================
80
81At the lexical level, a command comprises a sequence of words separated
82by whitespace characters. Note that newlines are treated as word
83separators and do *not* end a command or allow multiple commands to
84be done together. So these are all equivalent:
85
86nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' >
87 <debugfs>/dynamic_debug/control
88nullarbor:~ # echo -c ' file svcsock.c line 1603 +p ' >
89 <debugfs>/dynamic_debug/control
90nullarbor:~ # echo -c 'file svcsock.c\nline 1603 +p' >
91 <debugfs>/dynamic_debug/control
92nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
93 <debugfs>/dynamic_debug/control
94
95Commands are bounded by a write() system call. If you want to do
96multiple commands you need to do a separate "echo" for each, like:
97
98nullarbor:~ # echo 'file svcsock.c line 1603 +p' > /proc/dprintk ;\
99> echo 'file svcsock.c line 1563 +p' > /proc/dprintk
100
101or even like:
102
103nullarbor:~ # (
104> echo 'file svcsock.c line 1603 +p' ;\
105> echo 'file svcsock.c line 1563 +p' ;\
106> ) > /proc/dprintk
107
108At the syntactical level, a command comprises a sequence of match
109specifications, followed by a flags change specification.
110
111command ::= match-spec* flags-spec
112
113The match-spec's are used to choose a subset of the known dprintk()
114callsites to which to apply the flags-spec. Think of them as a query
115with implicit ANDs between each pair. Note that an empty list of
116match-specs is possible, but is not very useful because it will not
117match any debug statement callsites.
118
119A match specification comprises a keyword, which controls the attribute
120of the callsite to be compared, and a value to compare against. Possible
121keywords are:
122
123match-spec ::= 'func' string |
124 'file' string |
125 'module' string |
126 'format' string |
127 'line' line-range
128
129line-range ::= lineno |
130 '-'lineno |
131 lineno'-' |
132 lineno'-'lineno
133// Note: line-range cannot contain space, e.g.
134// "1-30" is valid range but "1 - 30" is not.
135
136lineno ::= unsigned-int
137
138The meanings of each keyword are:
139
140func
141 The given string is compared against the function name
142 of each callsite. Example:
143
144 func svc_tcp_accept
145
146file
147 The given string is compared against either the full
148 pathname or the basename of the source file of each
149 callsite. Examples:
150
151 file svcsock.c
152 file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c
153
154module
155 The given string is compared against the module name
156 of each callsite. The module name is the string as
157 seen in "lsmod", i.e. without the directory or the .ko
158 suffix and with '-' changed to '_'. Examples:
159
160 module sunrpc
161 module nfsd
162
163format
164 The given string is searched for in the dynamic debug format
165 string. Note that the string does not need to match the
166 entire format, only some part. Whitespace and other
167 special characters can be escaped using C octal character
168 escape \ooo notation, e.g. the space character is \040.
169 Alternatively, the string can be enclosed in double quote
170 characters (") or single quote characters (').
171 Examples:
172
173 format svcrdma: // many of the NFS/RDMA server dprintks
174 format readahead // some dprintks in the readahead cache
175 format nfsd:\040SETATTR // one way to match a format with whitespace
176 format "nfsd: SETATTR" // a neater way to match a format with whitespace
177 format 'nfsd: SETATTR' // yet another way to match a format with whitespace
178
179line
180 The given line number or range of line numbers is compared
181 against the line number of each dprintk() callsite. A single
182 line number matches the callsite line number exactly. A
183 range of line numbers matches any callsite between the first
184 and last line number inclusive. An empty first number means
185 the first line in the file, an empty line number means the
186 last number in the file. Examples:
187
188 line 1603 // exactly line 1603
189 line 1600-1605 // the six lines from line 1600 to line 1605
190 line -1605 // the 1605 lines from line 1 to line 1605
191 line 1600- // all lines from line 1600 to the end of the file
192
193The flags specification comprises a change operation followed
194by one or more flag characters. The change operation is one
195of the characters:
196
197-
198 remove the given flags
199
200+
201 add the given flags
202
203=
204 set the flags to the given flags
205
206The flags are:
207
208p
209 Causes a printk() message to be emitted to dmesg
210
211Note the regexp ^[-+=][scp]+$ matches a flags specification.
212Note also that there is no convenient syntax to remove all
213the flags at once, you need to use "-psc".
214
215Examples
216========
217
218// enable the message at line 1603 of file svcsock.c
219nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
220 <debugfs>/dynamic_debug/control
221
222// enable all the messages in file svcsock.c
223nullarbor:~ # echo -n 'file svcsock.c +p' >
224 <debugfs>/dynamic_debug/control
225
226// enable all the messages in the NFS server module
227nullarbor:~ # echo -n 'module nfsd +p' >
228 <debugfs>/dynamic_debug/control
229
230// enable all 12 messages in the function svc_process()
231nullarbor:~ # echo -n 'func svc_process +p' >
232 <debugfs>/dynamic_debug/control
233
234// disable all 12 messages in the function svc_process()
235nullarbor:~ # echo -n 'func svc_process -p' >
236 <debugfs>/dynamic_debug/control
237
238// enable messages for NFS calls READ, READLINK, READDIR and READDIR+.
239nullarbor:~ # echo -n 'format "nfsd: READ" +p' >
240 <debugfs>/dynamic_debug/control
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 8eda3fb66416..06f8f46692dc 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -23,8 +23,8 @@ first time, it was renamed to 'EDAC'.
23The bluesmoke project at sourceforge.net is now utilized as a 'staging area' 23The bluesmoke project at sourceforge.net is now utilized as a 'staging area'
24for EDAC development, before it is sent upstream to kernel.org 24for EDAC development, before it is sent upstream to kernel.org
25 25
26At the bluesmoke/EDAC project site, is a series of quilt patches against 26At the bluesmoke/EDAC project site is a series of quilt patches against
27recent kernels, stored in a SVN respository. For easier downloading, there 27recent kernels, stored in a SVN repository. For easier downloading, there
28is also a tarball snapshot available. 28is also a tarball snapshot available.
29 29
30============================================================================ 30============================================================================
@@ -73,9 +73,9 @@ the vendor should tie the parity status bits to 0 if they do not intend
73to generate parity. Some vendors do not do this, and thus the parity bit 73to generate parity. Some vendors do not do this, and thus the parity bit
74can "float" giving false positives. 74can "float" giving false positives.
75 75
76In the kernel there is a pci device attribute located in sysfs that is 76In the kernel there is a PCI device attribute located in sysfs that is
77checked by the EDAC PCI scanning code. If that attribute is set, 77checked by the EDAC PCI scanning code. If that attribute is set,
78PCI parity/error scannining is skipped for that device. The attribute 78PCI parity/error scanning is skipped for that device. The attribute
79is: 79is:
80 80
81 broken_parity_status 81 broken_parity_status
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 4bc374a14345..079305640790 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -29,16 +29,16 @@ o debugfs entries
29fault-inject-debugfs kernel module provides some debugfs entries for runtime 29fault-inject-debugfs kernel module provides some debugfs entries for runtime
30configuration of fault-injection capabilities. 30configuration of fault-injection capabilities.
31 31
32- /debug/fail*/probability: 32- /sys/kernel/debug/fail*/probability:
33 33
34 likelihood of failure injection, in percent. 34 likelihood of failure injection, in percent.
35 Format: <percent> 35 Format: <percent>
36 36
37 Note that one-failure-per-hundred is a very high error rate 37 Note that one-failure-per-hundred is a very high error rate
38 for some testcases. Consider setting probability=100 and configure 38 for some testcases. Consider setting probability=100 and configure
39 /debug/fail*/interval for such testcases. 39 /sys/kernel/debug/fail*/interval for such testcases.
40 40
41- /debug/fail*/interval: 41- /sys/kernel/debug/fail*/interval:
42 42
43 specifies the interval between failures, for calls to 43 specifies the interval between failures, for calls to
44 should_fail() that pass all the other tests. 44 should_fail() that pass all the other tests.
@@ -46,18 +46,18 @@ configuration of fault-injection capabilities.
46 Note that if you enable this, by setting interval>1, you will 46 Note that if you enable this, by setting interval>1, you will
47 probably want to set probability=100. 47 probably want to set probability=100.
48 48
49- /debug/fail*/times: 49- /sys/kernel/debug/fail*/times:
50 50
51 specifies how many times failures may happen at most. 51 specifies how many times failures may happen at most.
52 A value of -1 means "no limit". 52 A value of -1 means "no limit".
53 53
54- /debug/fail*/space: 54- /sys/kernel/debug/fail*/space:
55 55
56 specifies an initial resource "budget", decremented by "size" 56 specifies an initial resource "budget", decremented by "size"
57 on each call to should_fail(,size). Failure injection is 57 on each call to should_fail(,size). Failure injection is
58 suppressed until "space" reaches zero. 58 suppressed until "space" reaches zero.
59 59
60- /debug/fail*/verbose 60- /sys/kernel/debug/fail*/verbose
61 61
62 Format: { 0 | 1 | 2 } 62 Format: { 0 | 1 | 2 }
63 specifies the verbosity of the messages when failure is 63 specifies the verbosity of the messages when failure is
@@ -65,17 +65,17 @@ configuration of fault-injection capabilities.
65 log line per failure; '2' will print a call trace too -- useful 65 log line per failure; '2' will print a call trace too -- useful
66 to debug the problems revealed by fault injection. 66 to debug the problems revealed by fault injection.
67 67
68- /debug/fail*/task-filter: 68- /sys/kernel/debug/fail*/task-filter:
69 69
70 Format: { 'Y' | 'N' } 70 Format: { 'Y' | 'N' }
71 A value of 'N' disables filtering by process (default). 71 A value of 'N' disables filtering by process (default).
72 Any positive value limits failures to only processes indicated by 72 Any positive value limits failures to only processes indicated by
73 /proc/<pid>/make-it-fail==1. 73 /proc/<pid>/make-it-fail==1.
74 74
75- /debug/fail*/require-start: 75- /sys/kernel/debug/fail*/require-start:
76- /debug/fail*/require-end: 76- /sys/kernel/debug/fail*/require-end:
77- /debug/fail*/reject-start: 77- /sys/kernel/debug/fail*/reject-start:
78- /debug/fail*/reject-end: 78- /sys/kernel/debug/fail*/reject-end:
79 79
80 specifies the range of virtual addresses tested during 80 specifies the range of virtual addresses tested during
81 stacktrace walking. Failure is injected only if some caller 81 stacktrace walking. Failure is injected only if some caller
@@ -84,26 +84,26 @@ configuration of fault-injection capabilities.
84 Default required range is [0,ULONG_MAX) (whole of virtual address space). 84 Default required range is [0,ULONG_MAX) (whole of virtual address space).
85 Default rejected range is [0,0). 85 Default rejected range is [0,0).
86 86
87- /debug/fail*/stacktrace-depth: 87- /sys/kernel/debug/fail*/stacktrace-depth:
88 88
89 specifies the maximum stacktrace depth walked during search 89 specifies the maximum stacktrace depth walked during search
90 for a caller within [require-start,require-end) OR 90 for a caller within [require-start,require-end) OR
91 [reject-start,reject-end). 91 [reject-start,reject-end).
92 92
93- /debug/fail_page_alloc/ignore-gfp-highmem: 93- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem:
94 94
95 Format: { 'Y' | 'N' } 95 Format: { 'Y' | 'N' }
96 default is 'N', setting it to 'Y' won't inject failures into 96 default is 'N', setting it to 'Y' won't inject failures into
97 highmem/user allocations. 97 highmem/user allocations.
98 98
99- /debug/failslab/ignore-gfp-wait: 99- /sys/kernel/debug/failslab/ignore-gfp-wait:
100- /debug/fail_page_alloc/ignore-gfp-wait: 100- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
101 101
102 Format: { 'Y' | 'N' } 102 Format: { 'Y' | 'N' }
103 default is 'N', setting it to 'Y' will inject failures 103 default is 'N', setting it to 'Y' will inject failures
104 only into non-sleep allocations (GFP_ATOMIC allocations). 104 only into non-sleep allocations (GFP_ATOMIC allocations).
105 105
106- /debug/fail_page_alloc/min-order: 106- /sys/kernel/debug/fail_page_alloc/min-order:
107 107
108 specifies the minimum page allocation order to be injected 108 specifies the minimum page allocation order to be injected
109 failures. 109 failures.
@@ -166,13 +166,13 @@ o Inject slab allocation failures into module init/exit code
166#!/bin/bash 166#!/bin/bash
167 167
168FAILTYPE=failslab 168FAILTYPE=failslab
169echo Y > /debug/$FAILTYPE/task-filter 169echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
170echo 10 > /debug/$FAILTYPE/probability 170echo 10 > /sys/kernel/debug/$FAILTYPE/probability
171echo 100 > /debug/$FAILTYPE/interval 171echo 100 > /sys/kernel/debug/$FAILTYPE/interval
172echo -1 > /debug/$FAILTYPE/times 172echo -1 > /sys/kernel/debug/$FAILTYPE/times
173echo 0 > /debug/$FAILTYPE/space 173echo 0 > /sys/kernel/debug/$FAILTYPE/space
174echo 2 > /debug/$FAILTYPE/verbose 174echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
175echo 1 > /debug/$FAILTYPE/ignore-gfp-wait 175echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
176 176
177faulty_system() 177faulty_system()
178{ 178{
@@ -217,20 +217,20 @@ then
217 exit 1 217 exit 1
218fi 218fi
219 219
220cat /sys/module/$module/sections/.text > /debug/$FAILTYPE/require-start 220cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start
221cat /sys/module/$module/sections/.data > /debug/$FAILTYPE/require-end 221cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end
222 222
223echo N > /debug/$FAILTYPE/task-filter 223echo N > /sys/kernel/debug/$FAILTYPE/task-filter
224echo 10 > /debug/$FAILTYPE/probability 224echo 10 > /sys/kernel/debug/$FAILTYPE/probability
225echo 100 > /debug/$FAILTYPE/interval 225echo 100 > /sys/kernel/debug/$FAILTYPE/interval
226echo -1 > /debug/$FAILTYPE/times 226echo -1 > /sys/kernel/debug/$FAILTYPE/times
227echo 0 > /debug/$FAILTYPE/space 227echo 0 > /sys/kernel/debug/$FAILTYPE/space
228echo 2 > /debug/$FAILTYPE/verbose 228echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
229echo 1 > /debug/$FAILTYPE/ignore-gfp-wait 229echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
230echo 1 > /debug/$FAILTYPE/ignore-gfp-highmem 230echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
231echo 10 > /debug/$FAILTYPE/stacktrace-depth 231echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth
232 232
233trap "echo 0 > /debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT 233trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
234 234
235echo "Injecting errors into the module $module... (interrupt to stop)" 235echo "Injecting errors into the module $module... (interrupt to stop)"
236sleep 1000000 236sleep 1000000
diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
index caabbd395e61..a618fd99c9f0 100644
--- a/Documentation/fb/00-INDEX
+++ b/Documentation/fb/00-INDEX
@@ -11,8 +11,6 @@ aty128fb.txt
11 - info on the ATI Rage128 frame buffer driver. 11 - info on the ATI Rage128 frame buffer driver.
12cirrusfb.txt 12cirrusfb.txt
13 - info on the driver for Cirrus Logic chipsets. 13 - info on the driver for Cirrus Logic chipsets.
14cyblafb/
15 - directory with documentation files related to the cyblafb driver.
16deferred_io.txt 14deferred_io.txt
17 - an introduction to deferred IO. 15 - an introduction to deferred IO.
18fbcon.txt 16fbcon.txt
diff --git a/Documentation/fb/cyblafb/bugs b/Documentation/fb/cyblafb/bugs
deleted file mode 100644
index 9443a6d72cdd..000000000000
--- a/Documentation/fb/cyblafb/bugs
+++ /dev/null
@@ -1,13 +0,0 @@
1Bugs
2====
3
4I currently don't know of any bug. Please do send reports to:
5 - linux-fbdev-devel@lists.sourceforge.net
6 - Knut_Petersen@t-online.de.
7
8
9Untested features
10=================
11
12All LCD stuff is untested. If it worked in tridentfb, it should work in
13cyblafb. Please test and report the results to Knut_Petersen@t-online.de.
diff --git a/Documentation/fb/cyblafb/credits b/Documentation/fb/cyblafb/credits
deleted file mode 100644
index 0eb3b443dc2b..000000000000
--- a/Documentation/fb/cyblafb/credits
+++ /dev/null
@@ -1,7 +0,0 @@
1Thanks to
2=========
3 * Alan Hourihane, for writing the X trident driver
4 * Jani Monoses, for writing the tridentfb driver
5 * Antonino A. Daplas, for review of the first published
6 version of cyblafb and some code
7 * Jochen Hein, for testing and a helpfull bug report
diff --git a/Documentation/fb/cyblafb/documentation b/Documentation/fb/cyblafb/documentation
deleted file mode 100644
index bb1aac048425..000000000000
--- a/Documentation/fb/cyblafb/documentation
+++ /dev/null
@@ -1,17 +0,0 @@
1Available Documentation
2=======================
3
4Apollo PLE 133 Chipset VT8601A North Bridge Datasheet, Rev. 1.82, October 22,
52001, available from VIA:
6
7 http://www.viavpsd.com/product/6/15/DS8601A182.pdf
8
9The datasheet is incomplete, some registers that need to be programmed are not
10explained at all and important bits are listed as "reserved". But you really
11need the datasheet to understand the code. "p. xxx" comments refer to page
12numbers of this document.
13
14XFree/XOrg drivers are available and of good quality, looking at the code
15there is a good idea if the datasheet does not provide enough information
16or if the datasheet seems to be wrong.
17
diff --git a/Documentation/fb/cyblafb/fb.modes b/Documentation/fb/cyblafb/fb.modes
deleted file mode 100644
index fe0e5223ba86..000000000000
--- a/Documentation/fb/cyblafb/fb.modes
+++ /dev/null
@@ -1,154 +0,0 @@
1#
2# Sample fb.modes file
3#
4# Provides an incomplete list of working modes for
5# the cyberblade/i1 graphics core.
6#
7# The value 4294967256 is used instead of -40. Of course, -40 is not
8# a really reasonable value, but chip design does not always follow
9# logic. Believe me, it's ok, and it's the way the BIOS does it.
10#
11# fbset requires 4294967256 in fb.modes and -40 as an argument to
12# the -t parameter. That's also not too reasonable, and it might change
13# in the future or might even be differt for your current version.
14#
15
16mode "640x480-50"
17 geometry 640 480 2048 4096 8
18 timings 47619 4294967256 24 17 0 216 3
19endmode
20
21mode "640x480-60"
22 geometry 640 480 2048 4096 8
23 timings 39682 4294967256 24 17 0 216 3
24endmode
25
26mode "640x480-70"
27 geometry 640 480 2048 4096 8
28 timings 34013 4294967256 24 17 0 216 3
29endmode
30
31mode "640x480-72"
32 geometry 640 480 2048 4096 8
33 timings 33068 4294967256 24 17 0 216 3
34endmode
35
36mode "640x480-75"
37 geometry 640 480 2048 4096 8
38 timings 31746 4294967256 24 17 0 216 3
39endmode
40
41mode "640x480-80"
42 geometry 640 480 2048 4096 8
43 timings 29761 4294967256 24 17 0 216 3
44endmode
45
46mode "640x480-85"
47 geometry 640 480 2048 4096 8
48 timings 28011 4294967256 24 17 0 216 3
49endmode
50
51mode "800x600-50"
52 geometry 800 600 2048 4096 8
53 timings 30303 96 24 14 0 136 11
54endmode
55
56mode "800x600-60"
57 geometry 800 600 2048 4096 8
58 timings 25252 96 24 14 0 136 11
59endmode
60
61mode "800x600-70"
62 geometry 800 600 2048 4096 8
63 timings 21645 96 24 14 0 136 11
64endmode
65
66mode "800x600-72"
67 geometry 800 600 2048 4096 8
68 timings 21043 96 24 14 0 136 11
69endmode
70
71mode "800x600-75"
72 geometry 800 600 2048 4096 8
73 timings 20202 96 24 14 0 136 11
74endmode
75
76mode "800x600-80"
77 geometry 800 600 2048 4096 8
78 timings 18939 96 24 14 0 136 11
79endmode
80
81mode "800x600-85"
82 geometry 800 600 2048 4096 8
83 timings 17825 96 24 14 0 136 11
84endmode
85
86mode "1024x768-50"
87 geometry 1024 768 2048 4096 8
88 timings 19054 144 24 29 0 120 3
89endmode
90
91mode "1024x768-60"
92 geometry 1024 768 2048 4096 8
93 timings 15880 144 24 29 0 120 3
94endmode
95
96mode "1024x768-70"
97 geometry 1024 768 2048 4096 8
98 timings 13610 144 24 29 0 120 3
99endmode
100
101mode "1024x768-72"
102 geometry 1024 768 2048 4096 8
103 timings 13232 144 24 29 0 120 3
104endmode
105
106mode "1024x768-75"
107 geometry 1024 768 2048 4096 8
108 timings 12703 144 24 29 0 120 3
109endmode
110
111mode "1024x768-80"
112 geometry 1024 768 2048 4096 8
113 timings 11910 144 24 29 0 120 3
114endmode
115
116mode "1024x768-85"
117 geometry 1024 768 2048 4096 8
118 timings 11209 144 24 29 0 120 3
119endmode
120
121mode "1280x1024-50"
122 geometry 1280 1024 2048 4096 8
123 timings 11114 232 16 39 0 160 3
124endmode
125
126mode "1280x1024-60"
127 geometry 1280 1024 2048 4096 8
128 timings 9262 232 16 39 0 160 3
129endmode
130
131mode "1280x1024-70"
132 geometry 1280 1024 2048 4096 8
133 timings 7939 232 16 39 0 160 3
134endmode
135
136mode "1280x1024-72"
137 geometry 1280 1024 2048 4096 8
138 timings 7719 232 16 39 0 160 3
139endmode
140
141mode "1280x1024-75"
142 geometry 1280 1024 2048 4096 8
143 timings 7410 232 16 39 0 160 3
144endmode
145
146mode "1280x1024-80"
147 geometry 1280 1024 2048 4096 8
148 timings 6946 232 16 39 0 160 3
149endmode
150
151mode "1280x1024-85"
152 geometry 1280 1024 2048 4096 8
153 timings 6538 232 16 39 0 160 3
154endmode
diff --git a/Documentation/fb/cyblafb/performance b/Documentation/fb/cyblafb/performance
deleted file mode 100644
index 8d15d5dfc6b3..000000000000
--- a/Documentation/fb/cyblafb/performance
+++ /dev/null
@@ -1,79 +0,0 @@
1Speed
2=====
3
4CyBlaFB is much faster than tridentfb and vesafb. Compare the performance data
5for mode 1280x1024-[8,16,32]@61 Hz.
6
7Test 1: Cat a file with 2000 lines of 0 characters.
8Test 2: Cat a file with 2000 lines of 80 characters.
9Test 3: Cat a file with 2000 lines of 160 characters.
10
11All values show system time use in seconds, kernel 2.6.12 was used for
12the measurements. 2.6.13 is a bit slower, 2.6.14 hopefully will include a
13patch that speeds up kernel bitblitting a lot ( > 20%).
14
15+-----------+-----------------------------------------------------+
16| | not accelerated |
17| TRIDENTFB +-----------------+-----------------+-----------------+
18| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp |
19| | noypan | ypan | noypan | ypan | noypan | ypan |
20+-----------+--------+--------+--------+--------+--------+--------+
21| Test 1 | 4.31 | 4.33 | 6.05 | 12.81 | ---- | ---- |
22| Test 2 | 67.94 | 5.44 | 123.16 | 14.79 | ---- | ---- |
23| Test 3 | 131.36 | 6.55 | 240.12 | 16.76 | ---- | ---- |
24+-----------+--------+--------+--------+--------+--------+--------+
25| Comments | | | completely bro- |
26| | | | ken, monitor |
27| | | | switches off |
28+-----------+-----------------+-----------------+-----------------+
29
30
31+-----------+-----------------------------------------------------+
32| | accelerated |
33| TRIDENTFB +-----------------+-----------------+-----------------+
34| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp |
35| | noypan | ypan | noypan | ypan | noypan | ypan |
36+-----------+--------+--------+--------+--------+--------+--------+
37| Test 1 | ---- | ---- | 20.62 | 1.22 | ---- | ---- |
38| Test 2 | ---- | ---- | 22.61 | 3.19 | ---- | ---- |
39| Test 3 | ---- | ---- | 24.59 | 5.16 | ---- | ---- |
40+-----------+--------+--------+--------+--------+--------+--------+
41| Comments | broken, writing | broken, ok only | completely bro- |
42| | to wrong places | if bgcolor is | ken, monitor |
43| | on screen + bug | black, bug in | switches off |
44| | in fillrect() | fillrect() | |
45+-----------+-----------------+-----------------+-----------------+
46
47
48+-----------+-----------------------------------------------------+
49| | not accelerated |
50| VESAFB +-----------------+-----------------+-----------------+
51| of 2.6.12 | 8 bpp | 16 bpp | 32 bpp |
52| | noypan | ypan | noypan | ypan | noypan | ypan |
53+-----------+--------+--------+--------+--------+--------+--------+
54| Test 1 | 4.26 | 3.76 | 5.99 | 7.23 | ---- | ---- |
55| Test 2 | 65.65 | 4.89 | 120.88 | 9.08 | ---- | ---- |
56| Test 3 | 126.91 | 5.94 | 235.77 | 11.03 | ---- | ---- |
57+-----------+--------+--------+--------+--------+--------+--------+
58| Comments | vga=0x307 | vga=0x31a | vga=0x31b not |
59| | fh=80kHz | fh=80kHz | supported by |
60| | fv=75kHz | fv=75kHz | video BIOS and |
61| | | | hardware |
62+-----------+-----------------+-----------------+-----------------+
63
64
65+-----------+-----------------------------------------------------+
66| | accelerated |
67| CYBLAFB +-----------------+-----------------+-----------------+
68| | 8 bpp | 16 bpp | 32 bpp |
69| | noypan | ypan | noypan | ypan | noypan | ypan |
70+-----------+--------+--------+--------+--------+--------+--------+
71| Test 1 | 8.02 | 0.23 | 19.04 | 0.61 | 57.12 | 2.74 |
72| Test 2 | 8.38 | 0.55 | 19.39 | 0.92 | 57.54 | 3.13 |
73| Test 3 | 8.73 | 0.86 | 19.74 | 1.24 | 57.95 | 3.51 |
74+-----------+--------+--------+--------+--------+--------+--------+
75| Comments | | | |
76| | | | |
77| | | | |
78| | | | |
79+-----------+-----------------+-----------------+-----------------+
diff --git a/Documentation/fb/cyblafb/todo b/Documentation/fb/cyblafb/todo
deleted file mode 100644
index c5f6d0eae545..000000000000
--- a/Documentation/fb/cyblafb/todo
+++ /dev/null
@@ -1,31 +0,0 @@
1TODO / Missing features
2=======================
3
4Verify LCD stuff "stretch" and "center" options are
5 completely untested ... this code needs to be
6 verified. As I don't have access to such
7 hardware, please contact me if you are
8 willing run some tests.
9
10Interlaced video modes The reason that interleaved
11 modes are disabled is that I do not know
12 the meaning of the vertical interlace
13 parameter. Also the datasheet mentions a
14 bit d8 of a horizontal interlace parameter,
15 but nowhere the lower 8 bits. Please help
16 if you can.
17
18low-res double scan modes Who needs it?
19
20accelerated color blitting Who needs it? The console driver does use color
21 blitting for nothing but drawing the penguine,
22 everything else is done using color expanding
23 blitting of 1bpp character bitmaps.
24
25ioctls Who needs it?
26
27TV-out Will be done later. Use "vga= " at boot time
28 to set a suitable video mode.
29
30??? Feel free to contact me if you have any
31 feature requests
diff --git a/Documentation/fb/cyblafb/usage b/Documentation/fb/cyblafb/usage
deleted file mode 100644
index a39bb3d402a2..000000000000
--- a/Documentation/fb/cyblafb/usage
+++ /dev/null
@@ -1,217 +0,0 @@
1CyBlaFB is a framebuffer driver for the Cyberblade/i1 graphics core integrated
2into the VIA Apollo PLE133 (aka vt8601) south bridge. It is developed and
3tested using a VIA EPIA 5000 board.
4
5Cyblafb - compiled into the kernel or as a module?
6==================================================
7
8You might compile cyblafb either as a module or compile it permanently into the
9kernel.
10
11Unless you have a real reason to do so you should not compile both vesafb and
12cyblafb permanently into the kernel. It's possible and it helps during the
13developement cycle, but it's useless and will at least block some otherwise
14usefull memory for ordinary users.
15
16Selecting Modes
17===============
18
19 Startup Mode
20 ============
21
22 First of all, you might use the "vga=???" boot parameter as it is
23 documented in vesafb.txt and svga.txt. Cyblafb will detect the video
24 mode selected and will use the geometry and timings found by
25 inspecting the hardware registers.
26
27 video=cyblafb vga=0x317
28
29 Alternatively you might use a combination of the mode, ref and bpp
30 parameters. If you compiled the driver into the kernel, add something
31 like this to the kernel command line:
32
33 video=cyblafb:1280x1024,bpp=16,ref=50 ...
34
35 If you compiled the driver as a module, the same mode would be
36 selected by the following command:
37
38 modprobe cyblafb mode=1280x1024 bpp=16 ref=50 ...
39
40 None of the modes possible to select as startup modes are affected by
41 the problems described at the end of the next subsection.
42
43 For all startup modes cyblafb chooses a virtual x resolution of 2048,
44 the only exception is mode 1280x1024 in combination with 32 bpp. This
45 allows ywrap scrolling for all those modes if rotation is 0 or 2, and
46 also fast scrolling if rotation is 1 or 3. The default virtual y reso-
47 lution is 4096 for bpp == 8, 2048 for bpp==16 and 1024 for bpp == 32,
48 again with the only exception of 1280x1024 at 32 bpp.
49
50 Please do set your video memory size to 8 Mb in the Bios setup. Other
51 values will work, but performace is decreased for a lot of modes.
52
53 Mode changes using fbset
54 ========================
55
56 You might use fbset to change the video mode, see "man fbset". Cyblafb
57 generally does assume that you know what you are doing. But it does
58 some checks, especially those that are needed to prevent you from
59 damaging your hardware.
60
61 - only 8, 16, 24 and 32 bpp video modes are accepted
62 - interlaced video modes are not accepted
63 - double scan video modes are not accepted
64 - if a flat panel is found, cyblafb does not allow you
65 to program a resolution higher than the physical
66 resolution of the flat panel monitor
67 - cyblafb does not allow vclk to exceed 230 MHz. As 32 bpp
68 and (currently) 24 bit modes use a doubled vclk internally,
69 the dotclock limit as seen by fbset is 115 MHz for those
70 modes and 230 MHz for 8 and 16 bpp modes.
71 - cyblafb will allow you to select very high resolutions as
72 long as the hardware can be programmed to these modes. The
73 documented limit 1600x1200 is not enforced, but don't expect
74 perfect signal quality.
75
76 Any request that violates the rules given above will be either changed
77 to something the hardware supports or an error value will be returned.
78
79 If you program a virtual y resolution higher than the hardware limit,
80 cyblafb will silently decrease that value to the highest possible
81 value. The same is true for a virtual x resolution that is not
82 supported by the hardware. Cyblafb tries to adapt vyres first because
83 vxres decides if ywrap scrolling is possible or not.
84
85 Attempts to disable acceleration are ignored, I believe that this is
86 safe.
87
88 Some video modes that should work do not work as expected. If you use
89 the standard fb.modes, fbset 640x480-60 will program that mode, but
90 you will see a vertical area, about two characters wide, with only
91 much darker characters than the other characters on the screen.
92 Cyblafb does allow that mode to be set, as it does not violate the
93 official specifications. It would need a lot of code to reliably sort
94 out all invalid modes, playing around with the margin values will
95 give a valid mode quickly. And if cyblafb would detect such an invalid
96 mode, should it silently alter the requested values or should it
97 report an error? Both options have some pros and cons. As stated
98 above, none of the startup modes are affected, and if you set
99 verbosity to 1 or higher, cyblafb will print the fbset command that
100 would be needed to program that mode using fbset.
101
102
103Other Parameters
104================
105
106
107crt don't autodetect, assume monitor connected to
108 standard VGA connector
109
110fp don't autodetect, assume flat panel display
111 connected to flat panel monitor interface
112
113nativex inform driver about native x resolution of
114 flat panel monitor connected to special
115 interface (should be autodetected)
116
117stretch stretch image to adapt low resolution modes to
118 higer resolutions of flat panel monitors
119 connected to special interface
120
121center center image to adapt low resolution modes to
122 higer resolutions of flat panel monitors
123 connected to special interface
124
125memsize use if autodetected memsize is wrong ...
126 should never be necessary
127
128nopcirr disable PCI read retry
129nopciwr disable PCI write retry
130nopcirb disable PCI read bursts
131nopciwb disable PCI write bursts
132
133bpp bpp for specified modes
134 valid values: 8 || 16 || 24 || 32
135
136ref refresh rate for specified mode
137 valid values: 50 <= ref <= 85
138
139mode 640x480 or 800x600 or 1024x768 or 1280x1024
140 if not specified, the startup mode will be detected
141 and used, so you might also use the vga=??? parameter
142 described in vesafb.txt. If you do not specify a mode,
143 bpp and ref parameters are ignored.
144
145verbosity 0 is the default, increase to at least 2 for every
146 bug report!
147
148Development hints
149=================
150
151It's much faster do compile a module and to load the new version after
152unloading the old module than to compile a new kernel and to reboot. So if you
153try to work on cyblafb, it might be a good idea to use cyblafb as a module.
154In real life, fast often means dangerous, and that's also the case here. If
155you introduce a serious bug when cyblafb is compiled into the kernel, the
156kernel will lock or oops with a high probability before the file system is
157mounted, and the danger for your data is low. If you load a broken own version
158of cyblafb on a running system, the danger for the integrity of the file
159system is much higher as you might need a hard reset afterwards. Decide
160yourself.
161
162Module unloading, the vfb method
163================================
164
165If you want to unload/reload cyblafb using the virtual framebuffer, you need
166to enable vfb support in the kernel first. After that, load the modules as
167shown below:
168
169 modprobe vfb vfb_enable=1
170 modprobe fbcon
171 modprobe cyblafb
172 fbset -fb /dev/fb1 1280x1024-60 -vyres 2662
173 con2fb /dev/fb1 /dev/tty1
174 ...
175
176If you now made some changes to cyblafb and want to reload it, you might do it
177as show below:
178
179 con2fb /dev/fb0 /dev/tty1
180 ...
181 rmmod cyblafb
182 modprobe cyblafb
183 con2fb /dev/fb1 /dev/tty1
184 ...
185
186Of course, you might choose another mode, and most certainly you also want to
187map some other /dev/tty* to the real framebuffer device. You might also choose
188to compile fbcon as a kernel module or place it permanently in the kernel.
189
190I do not know of any way to unload fbcon, and fbcon will prevent the
191framebuffer device loaded first from unloading. [If there is a way, then
192please add a description here!]
193
194Module unloading, the vesafb method
195===================================
196
197Configure the kernel:
198
199 <*> Support for frame buffer devices
200 [*] VESA VGA graphics support
201 <M> Cyberblade/i1 support
202
203Add e.g. "video=vesafb:ypan vga=0x307" to the kernel parameters. The ypan
204parameter is important, choose any vga parameter you like as long as it is
205a graphics mode.
206
207After booting, load cyblafb without any mode and bpp parameter and assign
208cyblafb to individual ttys using con2fb, e.g.:
209
210 modprobe cyblafb
211 con2fb /dev/fb1 /dev/tty1
212
213Unloading cyblafb works without problems after you assign vesafb to all
214ttys again, e.g.:
215
216 con2fb /dev/fb0 /dev/tty1
217 rmmod cyblafb
diff --git a/Documentation/fb/cyblafb/whatsnew b/Documentation/fb/cyblafb/whatsnew
deleted file mode 100644
index 76c07a26e044..000000000000
--- a/Documentation/fb/cyblafb/whatsnew
+++ /dev/null
@@ -1,29 +0,0 @@
10.62
2====
3
4 - the vesafb parameter has been removed as I decided to allow the
5 feature without any special parameter.
6
7 - Cyblafb does not use the vga style of panning any longer, now the
8 "right view" register in the graphics engine IO space is used. Without
9 that change it was impossible to use all available memory, and without
10 access to all available memory it is impossible to ywrap.
11
12 - The imageblit function now uses hardware acceleration for all font
13 widths. Hardware blitting across pixel column 2048 is broken in the
14 cyberblade/i1 graphics core, but we work around that hardware bug.
15
16 - modes with vxres != xres are supported now.
17
18 - ywrap scrolling is supported now and the default. This is a big
19 performance gain.
20
21 - default video modes use vyres > yres and vxres > xres to allow
22 almost optimal scrolling speed for normal and rotated screens
23
24 - some features mainly usefull for debugging the upper layers of the
25 framebuffer system have been added, have a look at the code
26
27 - fixed: Oops after unloading cyblafb when reading /proc/io*
28
29 - we work around some bugs of the higher framebuffer layers.
diff --git a/Documentation/fb/cyblafb/whycyblafb b/Documentation/fb/cyblafb/whycyblafb
deleted file mode 100644
index a123bc11e698..000000000000
--- a/Documentation/fb/cyblafb/whycyblafb
+++ /dev/null
@@ -1,85 +0,0 @@
1I tried the following framebuffer drivers:
2
3 - TRIDENTFB is full of bugs. Acceleration is broken for Blade3D
4 graphics cores like the cyberblade/i1. It claims to support a great
5 number of devices, but documentation for most of these devices is
6 unfortunately not available. There is _no_ reason to use tridentfb
7 for cyberblade/i1 + CRT users. VESAFB is faster, and the one
8 advantage, mode switching, is broken in tridentfb.
9
10 - VESAFB is used by many distributions as a standard. Vesafb does
11 not support mode switching. VESAFB is a bit faster than the working
12 configurations of TRIDENTFB, but it is still too slow, even if you
13 use ypan.
14
15 - EPIAFB (you'll find it on sourceforge) supports the Cyberblade/i1
16 graphics core, but it still has serious bugs and developement seems
17 to have stopped. This is the one driver with TV-out support. If you
18 do need this feature, try epiafb.
19
20None of these drivers was a real option for me.
21
22I believe that is unreasonable to change code that announces to support 20
23devices if I only have more or less sufficient documentation for exactly one
24of these. The risk of breaking device foo while fixing device bar is too high.
25
26So I decided to start CyBlaFB as a stripped down tridentfb.
27
28All code specific to other Trident chips has been removed. After that there
29were a lot of cosmetic changes to increase the readability of the code. All
30register names were changed to those mnemonics used in the datasheet. Function
31and macro names were changed if they hindered easy understanding of the code.
32
33After that I debugged the code and implemented some new features. I'll try to
34give a little summary of the main changes:
35
36 - calculation of vertical and horizontal timings was fixed
37
38 - video signal quality has been improved dramatically
39
40 - acceleration:
41
42 - fillrect and copyarea were fixed and reenabled
43
44 - color expanding imageblit was newly implemented, color
45 imageblit (only used to draw the penguine) still uses the
46 generic code.
47
48 - init of the acceleration engine was improved and moved to a
49 place where it really works ...
50
51 - sync function has a timeout now and tries to reset and
52 reinit the accel engine if necessary
53
54 - fewer slow copyarea calls when doing ypan scrolling by using
55 undocumented bit d21 of screen start address stored in
56 CR2B[5]. BIOS does use it also, so this should be safe.
57
58 - cyblafb rejects any attempt to set modes that would cause vclk
59 values above reasonable 230 MHz. 32bit modes use a clock
60 multiplicator of 2, so fbset does show the correct values for
61 pixclock but not for vclk in this case. The fbset limit is 115 MHz
62 for 32 bpp modes.
63
64 - cyblafb rejects modes known to be broken or unimplemented (all
65 interlaced modes, all doublescan modes for now)
66
67 - cyblafb now works independant of the video mode in effect at startup
68 time (tridentfb does not init all needed registers to reasonable
69 values)
70
71 - switching between video modes does work reliably now
72
73 - the first video mode now is the one selected on startup using the
74 vga=???? mechanism or any of
75 - 640x480, 800x600, 1024x768, 1280x1024
76 - 8, 16, 24 or 32 bpp
77 - refresh between 50 Hz and 85 Hz, 1 Hz steps (1280x1024-32
78 is limited to 63Hz)
79
80 - pci retry and pci burst mode are settable (try to disable if you
81 experience latency problems)
82
83 - built as a module cyblafb might be unloaded and reloaded using
84 the vfb module and con2vt or might be used together with vesafb
85
diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt
index c87bfe5c630a..b994c3b10549 100644
--- a/Documentation/fb/sh7760fb.txt
+++ b/Documentation/fb/sh7760fb.txt
@@ -1,7 +1,7 @@
1SH7760/SH7763 integrated LCDC Framebuffer driver 1SH7760/SH7763 integrated LCDC Framebuffer driver
2================================================ 2================================================
3 3
40. Overwiew 40. Overview
5----------- 5-----------
6The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which 6The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
7supports (in theory) resolutions ranging from 1x1 to 1024x1024, 7supports (in theory) resolutions ranging from 1x1 to 1024x1024,
diff --git a/Documentation/fb/uvesafb.txt b/Documentation/fb/uvesafb.txt
index 7ac3c4078ff9..eefdd91d298a 100644
--- a/Documentation/fb/uvesafb.txt
+++ b/Documentation/fb/uvesafb.txt
@@ -59,7 +59,8 @@ Accepted options:
59ypan Enable display panning using the VESA protected mode 59ypan Enable display panning using the VESA protected mode
60 interface. The visible screen is just a window of the 60 interface. The visible screen is just a window of the
61 video memory, console scrolling is done by changing the 61 video memory, console scrolling is done by changing the
62 start of the window. Available on x86 only. 62 start of the window. This option is available on x86
63 only and is the default option on that architecture.
63 64
64ywrap Same as ypan, but assumes your gfx board can wrap-around 65ywrap Same as ypan, but assumes your gfx board can wrap-around
65 the video memory (i.e. starts reading from top if it 66 the video memory (i.e. starts reading from top if it
@@ -67,7 +68,7 @@ ywrap Same as ypan, but assumes your gfx board can wrap-around
67 Available on x86 only. 68 Available on x86 only.
68 69
69redraw Scroll by redrawing the affected part of the screen, this 70redraw Scroll by redrawing the affected part of the screen, this
70 is the safe (and slow) default. 71 is the default on non-x86.
71 72
72(If you're using uvesafb as a module, the above three options are 73(If you're using uvesafb as a module, the above three options are
73 used a parameter of the scroll option, e.g. scroll=ypan.) 74 used a parameter of the scroll option, e.g. scroll=ypan.)
@@ -182,7 +183,7 @@ from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo.
182 183
183-- 184--
184 Michal Januszewski <spock@gentoo.org> 185 Michal Januszewski <spock@gentoo.org>
185 Last updated: 2007-06-16 186 Last updated: 2009-03-30
186 187
187 Documentation of the uvesafb options is loosely based on vesafb.txt. 188 Documentation of the uvesafb options is loosely based on vesafb.txt.
188 189
diff --git a/Documentation/fb/vesafb.txt b/Documentation/fb/vesafb.txt
index ee277dd204b0..950d5a658cb3 100644
--- a/Documentation/fb/vesafb.txt
+++ b/Documentation/fb/vesafb.txt
@@ -95,7 +95,7 @@ There is no way to change the vesafb video mode and/or timings after
95booting linux. If you are not happy with the 60 Hz refresh rate, you 95booting linux. If you are not happy with the 60 Hz refresh rate, you
96have these options: 96have these options:
97 97
98 * configure and load the DOS-Tools for your the graphics board (if 98 * configure and load the DOS-Tools for the graphics board (if
99 available) and boot linux with loadlin. 99 available) and boot linux with loadlin.
100 * use a native driver (matroxfb/atyfb) instead if vesafb. If none 100 * use a native driver (matroxfb/atyfb) instead if vesafb. If none
101 is available, write a new one! 101 is available, write a new one!
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 20d3b94703a4..f8cd450be9aa 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -6,20 +6,61 @@ be removed from this file.
6 6
7--------------------------- 7---------------------------
8 8
9What: old static regulatory information and ieee80211_regdom module parameter 9What: IRQF_SAMPLE_RANDOM
10When: 2.6.29 10Check: IRQF_SAMPLE_RANDOM
11When: July 2009
12
13Why: Many of IRQF_SAMPLE_RANDOM users are technically bogus as entropy
14 sources in the kernel's current entropy model. To resolve this, every
15 input point to the kernel's entropy pool needs to better document the
16 type of entropy source it actually is. This will be replaced with
17 additional add_*_randomness functions in drivers/char/random.c
18
19Who: Robin Getz <rgetz@blackfin.uclinux.org> & Matt Mackall <mpm@selenic.com>
20
21---------------------------
22
23What: The ieee80211_regdom module parameter
24When: March 2010 / desktop catchup
25
26Why: This was inherited by the CONFIG_WIRELESS_OLD_REGULATORY code,
27 and currently serves as an option for users to define an
28 ISO / IEC 3166 alpha2 code for the country they are currently
29 present in. Although there are userspace API replacements for this
30 through nl80211 distributions haven't yet caught up with implementing
31 decent alternatives through standard GUIs. Although available as an
32 option through iw or wpa_supplicant its just a matter of time before
33 distributions pick up good GUI options for this. The ideal solution
34 would actually consist of intelligent designs which would do this for
35 the user automatically even when travelling through different countries.
36 Until then we leave this module parameter as a compromise.
37
38 When userspace improves with reasonable widely-available alternatives for
39 this we will no longer need this module parameter. This entry hopes that
40 by the super-futuristically looking date of "March 2010" we will have
41 such replacements widely available.
42
43Who: Luis R. Rodriguez <lrodriguez@atheros.com>
44
45---------------------------
46
47What: CONFIG_WIRELESS_OLD_REGULATORY - old static regulatory information
48When: March 2010 / desktop catchup
49
11Why: The old regulatory infrastructure has been replaced with a new one 50Why: The old regulatory infrastructure has been replaced with a new one
12 which does not require statically defined regulatory domains. We do 51 which does not require statically defined regulatory domains. We do
13 not want to keep static regulatory domains in the kernel due to the 52 not want to keep static regulatory domains in the kernel due to the
14 the dynamic nature of regulatory law and localization. We kept around 53 the dynamic nature of regulatory law and localization. We kept around
15 the old static definitions for the regulatory domains of: 54 the old static definitions for the regulatory domains of:
55
16 * US 56 * US
17 * JP 57 * JP
18 * EU 58 * EU
59
19 and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was 60 and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was
20 set. We also kept around the ieee80211_regdom module parameter in case 61 set. We will remove this option once the standard Linux desktop catches
21 some applications were relying on it. Changing regulatory domains 62 up with the new userspace APIs we have implemented.
22 can now be done instead by using nl80211, as is done with iw. 63
23Who: Luis R. Rodriguez <lrodriguez@atheros.com> 64Who: Luis R. Rodriguez <lrodriguez@atheros.com>
24 65
25--------------------------- 66---------------------------
@@ -37,10 +78,10 @@ Who: Pavel Machek <pavel@suse.cz>
37 78
38--------------------------- 79---------------------------
39 80
40What: Video4Linux API 1 ioctls and video_decoder.h from Video devices. 81What: Video4Linux API 1 ioctls and from Video devices.
41When: December 2008 82When: July 2009
42Files: include/linux/video_decoder.h include/linux/videodev.h 83Files: include/linux/videodev.h
43Check: include/linux/video_decoder.h include/linux/videodev.h 84Check: include/linux/videodev.h
44Why: V4L1 AP1 was replaced by V4L2 API during migration from 2.4 to 2.6 85Why: V4L1 AP1 was replaced by V4L2 API during migration from 2.4 to 2.6
45 series. The old API have lots of drawbacks and don't provide enough 86 series. The old API have lots of drawbacks and don't provide enough
46 means to work with all video and audio standards. The newer API is 87 means to work with all video and audio standards. The newer API is
@@ -228,8 +269,20 @@ Who: Jan Engelhardt <jengelh@computergmbh.de>
228 269
229--------------------------- 270---------------------------
230 271
272What: GPIO autorequest on gpio_direction_{input,output}() in gpiolib
273When: February 2010
274Why: All callers should use explicit gpio_request()/gpio_free().
275 The autorequest mechanism in gpiolib was provided mostly as a
276 migration aid for legacy GPIO interfaces (for SOC based GPIOs).
277 Those users have now largely migrated. Platforms implementing
278 the GPIO interfaces without using gpiolib will see no changes.
279Who: David Brownell <dbrownell@users.sourceforge.net>
280---------------------------
281
231What: b43 support for firmware revision < 410 282What: b43 support for firmware revision < 410
232When: July 2008 283When: The schedule was July 2008, but it was decided that we are going to keep the
284 code as long as there are no major maintanance headaches.
285 So it _could_ be removed _any_ time now, if it conflicts with something new.
233Why: The support code for the old firmware hurts code readability/maintainability 286Why: The support code for the old firmware hurts code readability/maintainability
234 and slightly hurts runtime performance. Bugfixes for the old firmware 287 and slightly hurts runtime performance. Bugfixes for the old firmware
235 are not provided by Broadcom anymore. 288 are not provided by Broadcom anymore.
@@ -244,13 +297,6 @@ Who: Glauber Costa <gcosta@redhat.com>
244 297
245--------------------------- 298---------------------------
246 299
247What: remove HID compat support
248When: 2.6.29
249Why: needed only as a temporary solution until distros fix themselves up
250Who: Jiri Slaby <jirislaby@gmail.com>
251
252---------------------------
253
254What: print_fn_descriptor_symbol() 300What: print_fn_descriptor_symbol()
255When: October 2009 301When: October 2009
256Why: The %pF vsprintf format provides the same functionality in a 302Why: The %pF vsprintf format provides the same functionality in a
@@ -282,6 +328,18 @@ Who: Vlad Yasevich <vladislav.yasevich@hp.com>
282 328
283--------------------------- 329---------------------------
284 330
331What: Ability for non root users to shm_get hugetlb pages based on mlock
332 resource limits
333When: 2.6.31
334Why: Non root users need to be part of /proc/sys/vm/hugetlb_shm_group or
335 have CAP_IPC_LOCK to be able to allocate shm segments backed by
336 huge pages. The mlock based rlimit check to allow shm hugetlb is
337 inconsistent with mmap based allocations. Hence it is being
338 deprecated.
339Who: Ravikiran Thirumalai <kiran@scalex86.org>
340
341---------------------------
342
285What: CONFIG_THERMAL_HWMON 343What: CONFIG_THERMAL_HWMON
286When: January 2009 344When: January 2009
287Why: This option was introduced just to allow older lm-sensors userspace 345Why: This option was introduced just to allow older lm-sensors userspace
@@ -310,14 +368,6 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl>
310 368
311--------------------------- 369---------------------------
312 370
313What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
314When: 2.6.29 (ideally) or 2.6.30 (more likely)
315Why: Deprecated by the new (standard) device driver binding model. Use
316 i2c_driver->probe() and ->remove() instead.
317Who: Jean Delvare <khali@linux-fr.org>
318
319---------------------------
320
321What: fscher and fscpos drivers 371What: fscher and fscpos drivers
322When: June 2009 372When: June 2009
323Why: Deprecated by the new fschmd driver. 373Why: Deprecated by the new fschmd driver.
@@ -326,17 +376,6 @@ Who: Hans de Goede <hdegoede@redhat.com>
326 376
327--------------------------- 377---------------------------
328 378
329What: SELinux "compat_net" functionality
330When: 2.6.30 at the earliest
331Why: In 2.6.18 the Secmark concept was introduced to replace the "compat_net"
332 network access control functionality of SELinux. Secmark offers both
333 better performance and greater flexibility than the "compat_net"
334 mechanism. Now that the major Linux distributions have moved to
335 Secmark, it is time to deprecate the older mechanism and start the
336 process of removing the old code.
337Who: Paul Moore <paul.moore@hp.com>
338---------------------------
339
340What: sysfs ui for changing p4-clockmod parameters 379What: sysfs ui for changing p4-clockmod parameters
341When: September 2009 380When: September 2009
342Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and 381Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
@@ -344,3 +383,78 @@ Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
344 Removal is subject to fixing any remaining bugs in ACPI which may 383 Removal is subject to fixing any remaining bugs in ACPI which may
345 cause the thermal throttling not to happen at the right time. 384 cause the thermal throttling not to happen at the right time.
346Who: Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com> 385Who: Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com>
386
387-----------------------------
388
389What: __do_IRQ all in one fits nothing interrupt handler
390When: 2.6.32
391Why: __do_IRQ was kept for easy migration to the type flow handlers.
392 More than two years of migration time is enough.
393Who: Thomas Gleixner <tglx@linutronix.de>
394
395-----------------------------
396
397What: obsolete generic irq defines and typedefs
398When: 2.6.30
399Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
400 have been kept around for migration reasons. After more than two years
401 it's time to remove them finally
402Who: Thomas Gleixner <tglx@linutronix.de>
403
404---------------------------
405
406What: fakephp and associated sysfs files in /sys/bus/pci/slots/
407When: 2011
408Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
409 represent a machine's physical PCI slots. The change in semantics
410 had userspace implications, as the hotplug core no longer allowed
411 drivers to create multiple sysfs files per physical slot (required
412 for multi-function devices, e.g.). fakephp was seen as a developer's
413 tool only, and its interface changed. Too late, we learned that
414 there were some users of the fakephp interface.
415
416 In 2.6.30, the original fakephp interface was restored. At the same
417 time, the PCI core gained the ability that fakephp provided, namely
418 function-level hot-remove and hot-add.
419
420 Since the PCI core now provides the same functionality, exposed in:
421
422 /sys/bus/pci/rescan
423 /sys/bus/pci/devices/.../remove
424 /sys/bus/pci/devices/.../rescan
425
426 there is no functional reason to maintain fakephp as well.
427
428 We will keep the existing module so that 'modprobe fakephp' will
429 present the old /sys/bus/pci/slots/... interface for compatibility,
430 but users are urged to migrate their applications to the API above.
431
432 After a reasonable transition period, we will remove the legacy
433 fakephp interface.
434Who: Alex Chiang <achiang@hp.com>
435
436---------------------------
437
438What: i2c-voodoo3 driver
439When: October 2009
440Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate
441 driver but this caused driver conflicts.
442Who: Jean Delvare <khali@linux-fr.org>
443 Krzysztof Helt <krzysztof.h1@wp.pl>
444
445---------------------------
446
447What: CONFIG_RFKILL_INPUT
448When: 2.6.33
449Why: Should be implemented in userspace, policy daemon.
450Who: Johannes Berg <johannes@sipsolutions.net>
451
452----------------------------
453
454What: CONFIG_X86_OLD_MCE
455When: 2.6.32
456Why: Remove the old legacy 32bit machine check code. This has been
457 superseded by the newer machine check code from the 64bit port,
458 but the old version has been kept around for easier testing. Note this
459 doesn't impact the old P5 and WinChip machine check handlers.
460Who: Andi Kleen <andi@firstfloor.org>
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 52cd611277a3..f15621ee5599 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -66,8 +66,14 @@ mandatory-locking.txt
66 - info on the Linux implementation of Sys V mandatory file locking. 66 - info on the Linux implementation of Sys V mandatory file locking.
67ncpfs.txt 67ncpfs.txt
68 - info on Novell Netware(tm) filesystem using NCP protocol. 68 - info on Novell Netware(tm) filesystem using NCP protocol.
69nfs41-server.txt
70 - info on the Linux server implementation of NFSv4 minor version 1.
71nfs-rdma.txt
72 - how to install and setup the Linux NFS/RDMA client and server software.
69nfsroot.txt 73nfsroot.txt
70 - short guide on setting up a diskless box with NFS root filesystem. 74 - short guide on setting up a diskless box with NFS root filesystem.
75nilfs2.txt
76 - info and mount options for the NILFS2 filesystem.
71ntfs.txt 77ntfs.txt
72 - info and mount options for the NTFS filesystem (Windows NT). 78 - info and mount options for the NTFS filesystem (Windows NT).
73ocfs2.txt 79ocfs2.txt
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ec6a9392a173..229d7b7c50a3 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -187,7 +187,7 @@ readpages: no
187write_begin: no locks the page yes 187write_begin: no locks the page yes
188write_end: no yes, unlocks yes 188write_end: no yes, unlocks yes
189perform_write: no n/a yes 189perform_write: no n/a yes
190bmap: yes 190bmap: no
191invalidatepage: no yes 191invalidatepage: no yes
192releasepage: no yes 192releasepage: no yes
193direct_IO: no 193direct_IO: no
@@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that
437can and should be done using the internal locking with smaller critical areas). 437can and should be done using the internal locking with smaller critical areas).
438Current worst offender is ext2_get_block()... 438Current worst offender is ext2_get_block()...
439 439
440->fasync() is a mess. This area needs a big cleanup and that will probably 440->fasync() is called without BKL protection, and is responsible for
441affect locking. 441maintaining the FASYNC bit in filp->f_flags. Most instances call
442fasync_helper(), which does that maintenance, so it's not normally
443something one needs to worry about. Return values > 0 will be mapped to
444zero in the VFS layer.
442 445
443->readdir() and ->ioctl() on directories must be changed. Ideally we would 446->readdir() and ->ioctl() on directories must be changed. Ideally we would
444move ->readdir() to inode_operations and use a separate method for directory 447move ->readdir() to inode_operations and use a separate method for directory
@@ -502,23 +505,31 @@ prototypes:
502 void (*open)(struct vm_area_struct*); 505 void (*open)(struct vm_area_struct*);
503 void (*close)(struct vm_area_struct*); 506 void (*close)(struct vm_area_struct*);
504 int (*fault)(struct vm_area_struct*, struct vm_fault *); 507 int (*fault)(struct vm_area_struct*, struct vm_fault *);
505 int (*page_mkwrite)(struct vm_area_struct *, struct page *); 508 int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
506 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); 509 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
507 510
508locking rules: 511locking rules:
509 BKL mmap_sem PageLocked(page) 512 BKL mmap_sem PageLocked(page)
510open: no yes 513open: no yes
511close: no yes 514close: no yes
512fault: no yes 515fault: no yes can return with page locked
513page_mkwrite: no yes no 516page_mkwrite: no yes can return with page locked
514access: no yes 517access: no yes
515 518
516 ->page_mkwrite() is called when a previously read-only page is 519 ->fault() is called when a previously not present pte is about
517about to become writeable. The file system is responsible for 520to be faulted in. The filesystem must find and return the page associated
518protecting against truncate races. Once appropriate action has been 521with the passed in "pgoff" in the vm_fault structure. If it is possible that
519taking to lock out truncate, the page range should be verified to be 522the page may be truncated and/or invalidated, then the filesystem must lock
520within i_size. The page mapping should also be checked that it is not 523the page, then ensure it is not already truncated (the page lock will block
521NULL. 524subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
525locked. The VM will unlock the page.
526
527 ->page_mkwrite() is called when a previously read-only pte is
528about to become writeable. The filesystem again must ensure that there are
529no truncate/invalidate races, and then return with the page locked. If
530the page has been truncated, the filesystem should not look up a new page
531like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
532will cause the VM to retry the fault.
522 533
523 ->access() is called when get_user_pages() fails in 534 ->access() is called when get_user_pages() fails in
524acces_process_vm(), typically used to debug a process through 535acces_process_vm(), typically used to debug a process through
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
index c6341745df37..8f78ded4b648 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -369,7 +369,7 @@ The call requires an initialized struct autofs_dev_ioctl. There are two
369possible variations. Both use the path field set to the path of the mount 369possible variations. Both use the path field set to the path of the mount
370point to check and the size field adjusted appropriately. One uses the 370point to check and the size field adjusted appropriately. One uses the
371ioctlfd field to identify a specific mount point to check while the other 371ioctlfd field to identify a specific mount point to check while the other
372variation uses the path and optionaly arg1 set to an autofs mount type. 372variation uses the path and optionally arg1 set to an autofs mount type.
373The call returns 1 if this is a mount point and sets arg1 to the device 373The call returns 1 if this is a mount point and sets arg1 to the device
374number of the mount and field arg2 to the relevant super block magic 374number of the mount and field arg2 to the relevant super block magic
375number (described below) or 0 if it isn't a mountpoint. In both cases 375number (described below) or 0 if it isn't a mountpoint. In both cases
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
new file mode 100644
index 000000000000..382d52cdaf2d
--- /dev/null
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -0,0 +1,658 @@
1 ==========================
2 FS-CACHE CACHE BACKEND API
3 ==========================
4
5The FS-Cache system provides an API by which actual caches can be supplied to
6FS-Cache for it to then serve out to network filesystems and other interested
7parties.
8
9This API is declared in <linux/fscache-cache.h>.
10
11
12====================================
13INITIALISING AND REGISTERING A CACHE
14====================================
15
16To start off, a cache definition must be initialised and registered for each
17cache the backend wants to make available. For instance, CacheFS does this in
18the fill_super() operation on mounting.
19
20The cache definition (struct fscache_cache) should be initialised by calling:
21
22 void fscache_init_cache(struct fscache_cache *cache,
23 struct fscache_cache_ops *ops,
24 const char *idfmt,
25 ...);
26
27Where:
28
29 (*) "cache" is a pointer to the cache definition;
30
31 (*) "ops" is a pointer to the table of operations that the backend supports on
32 this cache; and
33
34 (*) "idfmt" is a format and printf-style arguments for constructing a label
35 for the cache.
36
37
38The cache should then be registered with FS-Cache by passing a pointer to the
39previously initialised cache definition to:
40
41 int fscache_add_cache(struct fscache_cache *cache,
42 struct fscache_object *fsdef,
43 const char *tagname);
44
45Two extra arguments should also be supplied:
46
47 (*) "fsdef" which should point to the object representation for the FS-Cache
48 master index in this cache. Netfs primary index entries will be created
49 here. FS-Cache keeps the caller's reference to the index object if
50 successful and will release it upon withdrawal of the cache.
51
52 (*) "tagname" which, if given, should be a text string naming this cache. If
53 this is NULL, the identifier will be used instead. For CacheFS, the
54 identifier is set to name the underlying block device and the tag can be
55 supplied by mount.
56
57This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
58is already in use. 0 will be returned on success.
59
60
61=====================
62UNREGISTERING A CACHE
63=====================
64
65A cache can be withdrawn from the system by calling this function with a
66pointer to the cache definition:
67
68 void fscache_withdraw_cache(struct fscache_cache *cache);
69
70In CacheFS's case, this is called by put_super().
71
72
73========
74SECURITY
75========
76
77The cache methods are executed one of two contexts:
78
79 (1) that of the userspace process that issued the netfs operation that caused
80 the cache method to be invoked, or
81
82 (2) that of one of the processes in the FS-Cache thread pool.
83
84In either case, this may not be an appropriate context in which to access the
85cache.
86
87The calling process's fsuid, fsgid and SELinux security identities may need to
88be masqueraded for the duration of the cache driver's access to the cache.
89This is left to the cache to handle; FS-Cache makes no effort in this regard.
90
91
92===================================
93CONTROL AND STATISTICS PRESENTATION
94===================================
95
96The cache may present data to the outside world through FS-Cache's interfaces
97in sysfs and procfs - the former for control and the latter for statistics.
98
99A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS
100is enabled. This is accessible through the kobject struct fscache_cache::kobj
101and is for use by the cache as it sees fit.
102
103
104========================
105RELEVANT DATA STRUCTURES
106========================
107
108 (*) Index/Data file FS-Cache representation cookie:
109
110 struct fscache_cookie {
111 struct fscache_object_def *def;
112 struct fscache_netfs *netfs;
113 void *netfs_data;
114 ...
115 };
116
117 The fields that might be of use to the backend describe the object
118 definition, the netfs definition and the netfs's data for this cookie.
119 The object definition contain functions supplied by the netfs for loading
120 and matching index entries; these are required to provide some of the
121 cache operations.
122
123
124 (*) In-cache object representation:
125
126 struct fscache_object {
127 int debug_id;
128 enum {
129 FSCACHE_OBJECT_RECYCLING,
130 ...
131 } state;
132 spinlock_t lock
133 struct fscache_cache *cache;
134 struct fscache_cookie *cookie;
135 ...
136 };
137
138 Structures of this type should be allocated by the cache backend and
139 passed to FS-Cache when requested by the appropriate cache operation. In
140 the case of CacheFS, they're embedded in CacheFS's internal object
141 structures.
142
143 The debug_id is a simple integer that can be used in debugging messages
144 that refer to a particular object. In such a case it should be printed
145 using "OBJ%x" to be consistent with FS-Cache.
146
147 Each object contains a pointer to the cookie that represents the object it
148 is backing. An object should retired when put_object() is called if it is
149 in state FSCACHE_OBJECT_RECYCLING. The fscache_object struct should be
150 initialised by calling fscache_object_init(object).
151
152
153 (*) FS-Cache operation record:
154
155 struct fscache_operation {
156 atomic_t usage;
157 struct fscache_object *object;
158 unsigned long flags;
159 #define FSCACHE_OP_EXCLUSIVE
160 void (*processor)(struct fscache_operation *op);
161 void (*release)(struct fscache_operation *op);
162 ...
163 };
164
165 FS-Cache has a pool of threads that it uses to give CPU time to the
166 various asynchronous operations that need to be done as part of driving
167 the cache. These are represented by the above structure. The processor
168 method is called to give the op CPU time, and the release method to get
169 rid of it when its usage count reaches 0.
170
171 An operation can be made exclusive upon an object by setting the
172 appropriate flag before enqueuing it with fscache_enqueue_operation(). If
173 an operation needs more processing time, it should be enqueued again.
174
175
176 (*) FS-Cache retrieval operation record:
177
178 struct fscache_retrieval {
179 struct fscache_operation op;
180 struct address_space *mapping;
181 struct list_head *to_do;
182 ...
183 };
184
185 A structure of this type is allocated by FS-Cache to record retrieval and
186 allocation requests made by the netfs. This struct is then passed to the
187 backend to do the operation. The backend may get extra refs to it by
188 calling fscache_get_retrieval() and refs may be discarded by calling
189 fscache_put_retrieval().
190
191 A retrieval operation can be used by the backend to do retrieval work. To
192 do this, the retrieval->op.processor method pointer should be set
193 appropriately by the backend and fscache_enqueue_retrieval() called to
194 submit it to the thread pool. CacheFiles, for example, uses this to queue
195 page examination when it detects PG_lock being cleared.
196
197 The to_do field is an empty list available for the cache backend to use as
198 it sees fit.
199
200
201 (*) FS-Cache storage operation record:
202
203 struct fscache_storage {
204 struct fscache_operation op;
205 pgoff_t store_limit;
206 ...
207 };
208
209 A structure of this type is allocated by FS-Cache to record outstanding
210 writes to be made. FS-Cache itself enqueues this operation and invokes
211 the write_page() method on the object at appropriate times to effect
212 storage.
213
214
215================
216CACHE OPERATIONS
217================
218
219The cache backend provides FS-Cache with a table of operations that can be
220performed on the denizens of the cache. These are held in a structure of type:
221
222 struct fscache_cache_ops
223
224 (*) Name of cache provider [mandatory]:
225
226 const char *name
227
228 This isn't strictly an operation, but should be pointed at a string naming
229 the backend.
230
231
232 (*) Allocate a new object [mandatory]:
233
234 struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
235 struct fscache_cookie *cookie)
236
237 This method is used to allocate a cache object representation to back a
238 cookie in a particular cache. fscache_object_init() should be called on
239 the object to initialise it prior to returning.
240
241 This function may also be used to parse the index key to be used for
242 multiple lookup calls to turn it into a more convenient form. FS-Cache
243 will call the lookup_complete() method to allow the cache to release the
244 form once lookup is complete or aborted.
245
246
247 (*) Look up and create object [mandatory]:
248
249 void (*lookup_object)(struct fscache_object *object)
250
251 This method is used to look up an object, given that the object is already
252 allocated and attached to the cookie. This should instantiate that object
253 in the cache if it can.
254
255 The method should call fscache_object_lookup_negative() as soon as
256 possible if it determines the object doesn't exist in the cache. If the
257 object is found to exist and the netfs indicates that it is valid then
258 fscache_obtained_object() should be called once the object is in a
259 position to have data stored in it. Similarly, fscache_obtained_object()
260 should also be called once a non-present object has been created.
261
262 If a lookup error occurs, fscache_object_lookup_error() should be called
263 to abort the lookup of that object.
264
265
266 (*) Release lookup data [mandatory]:
267
268 void (*lookup_complete)(struct fscache_object *object)
269
270 This method is called to ask the cache to release any resources it was
271 using to perform a lookup.
272
273
274 (*) Increment object refcount [mandatory]:
275
276 struct fscache_object *(*grab_object)(struct fscache_object *object)
277
278 This method is called to increment the reference count on an object. It
279 may fail (for instance if the cache is being withdrawn) by returning NULL.
280 It should return the object pointer if successful.
281
282
283 (*) Lock/Unlock object [mandatory]:
284
285 void (*lock_object)(struct fscache_object *object)
286 void (*unlock_object)(struct fscache_object *object)
287
288 These methods are used to exclusively lock an object. It must be possible
289 to schedule with the lock held, so a spinlock isn't sufficient.
290
291
292 (*) Pin/Unpin object [optional]:
293
294 int (*pin_object)(struct fscache_object *object)
295 void (*unpin_object)(struct fscache_object *object)
296
297 These methods are used to pin an object into the cache. Once pinned an
298 object cannot be reclaimed to make space. Return -ENOSPC if there's not
299 enough space in the cache to permit this.
300
301
302 (*) Update object [mandatory]:
303
304 int (*update_object)(struct fscache_object *object)
305
306 This is called to update the index entry for the specified object. The
307 new information should be in object->cookie->netfs_data. This can be
308 obtained by calling object->cookie->def->get_aux()/get_attr().
309
310
311 (*) Discard object [mandatory]:
312
313 void (*drop_object)(struct fscache_object *object)
314
315 This method is called to indicate that an object has been unbound from its
316 cookie, and that the cache should release the object's resources and
317 retire it if it's in state FSCACHE_OBJECT_RECYCLING.
318
319 This method should not attempt to release any references held by the
320 caller. The caller will invoke the put_object() method as appropriate.
321
322
323 (*) Release object reference [mandatory]:
324
325 void (*put_object)(struct fscache_object *object)
326
327 This method is used to discard a reference to an object. The object may
328 be freed when all the references to it are released.
329
330
331 (*) Synchronise a cache [mandatory]:
332
333 void (*sync)(struct fscache_cache *cache)
334
335 This is called to ask the backend to synchronise a cache with its backing
336 device.
337
338
339 (*) Dissociate a cache [mandatory]:
340
341 void (*dissociate_pages)(struct fscache_cache *cache)
342
343 This is called to ask a cache to perform any page dissociations as part of
344 cache withdrawal.
345
346
347 (*) Notification that the attributes on a netfs file changed [mandatory]:
348
349 int (*attr_changed)(struct fscache_object *object);
350
351 This is called to indicate to the cache that certain attributes on a netfs
352 file have changed (for example the maximum size a file may reach). The
353 cache can read these from the netfs by calling the cookie's get_attr()
354 method.
355
356 The cache may use the file size information to reserve space on the cache.
357 It should also call fscache_set_store_limit() to indicate to FS-Cache the
358 highest byte it's willing to store for an object.
359
360 This method may return -ve if an error occurred or the cache object cannot
361 be expanded. In such a case, the object will be withdrawn from service.
362
363 This operation is run asynchronously from FS-Cache's thread pool, and
364 storage and retrieval operations from the netfs are excluded during the
365 execution of this operation.
366
367
368 (*) Reserve cache space for an object's data [optional]:
369
370 int (*reserve_space)(struct fscache_object *object, loff_t size);
371
372 This is called to request that cache space be reserved to hold the data
373 for an object and the metadata used to track it. Zero size should be
374 taken as request to cancel a reservation.
375
376 This should return 0 if successful, -ENOSPC if there isn't enough space
377 available, or -ENOMEM or -EIO on other errors.
378
379 The reservation may exceed the current size of the object, thus permitting
380 future expansion. If the amount of space consumed by an object would
381 exceed the reservation, it's permitted to refuse requests to allocate
382 pages, but not required. An object may be pruned down to its reservation
383 size if larger than that already.
384
385
386 (*) Request page be read from cache [mandatory]:
387
388 int (*read_or_alloc_page)(struct fscache_retrieval *op,
389 struct page *page,
390 gfp_t gfp)
391
392 This is called to attempt to read a netfs page from the cache, or to
393 reserve a backing block if not. FS-Cache will have done as much checking
394 as it can before calling, but most of the work belongs to the backend.
395
396 If there's no page in the cache, then -ENODATA should be returned if the
397 backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it
398 didn't.
399
400 If there is suitable data in the cache, then a read operation should be
401 queued and 0 returned. When the read finishes, fscache_end_io() should be
402 called.
403
404 The fscache_mark_pages_cached() should be called for the page if any cache
405 metadata is retained. This will indicate to the netfs that the page needs
406 explicit uncaching. This operation takes a pagevec, thus allowing several
407 pages to be marked at once.
408
409 The retrieval record pointed to by op should be retained for each page
410 queued and released when I/O on the page has been formally ended.
411 fscache_get/put_retrieval() are available for this purpose.
412
413 The retrieval record may be used to get CPU time via the FS-Cache thread
414 pool. If this is desired, the op->op.processor should be set to point to
415 the appropriate processing routine, and fscache_enqueue_retrieval() should
416 be called at an appropriate point to request CPU time. For instance, the
417 retrieval routine could be enqueued upon the completion of a disk read.
418 The to_do field in the retrieval record is provided to aid in this.
419
420 If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
421 returned if possible or fscache_end_io() called with a suitable error
422 code..
423
424
425 (*) Request pages be read from cache [mandatory]:
426
427 int (*read_or_alloc_pages)(struct fscache_retrieval *op,
428 struct list_head *pages,
429 unsigned *nr_pages,
430 gfp_t gfp)
431
432 This is like the read_or_alloc_page() method, except it is handed a list
433 of pages instead of one page. Any pages on which a read operation is
434 started must be added to the page cache for the specified mapping and also
435 to the LRU. Such pages must also be removed from the pages list and
436 *nr_pages decremented per page.
437
438 If there was an error such as -ENOMEM, then that should be returned; else
439 if one or more pages couldn't be read or allocated, then -ENOBUFS should
440 be returned; else if one or more pages couldn't be read, then -ENODATA
441 should be returned. If all the pages are dispatched then 0 should be
442 returned.
443
444
445 (*) Request page be allocated in the cache [mandatory]:
446
447 int (*allocate_page)(struct fscache_retrieval *op,
448 struct page *page,
449 gfp_t gfp)
450
451 This is like the read_or_alloc_page() method, except that it shouldn't
452 read from the cache, even if there's data there that could be retrieved.
453 It should, however, set up any internal metadata required such that
454 the write_page() method can write to the cache.
455
456 If there's no backing block available, then -ENOBUFS should be returned
457 (or -ENOMEM if there were other problems). If a block is successfully
458 allocated, then the netfs page should be marked and 0 returned.
459
460
461 (*) Request pages be allocated in the cache [mandatory]:
462
463 int (*allocate_pages)(struct fscache_retrieval *op,
464 struct list_head *pages,
465 unsigned *nr_pages,
466 gfp_t gfp)
467
468 This is an multiple page version of the allocate_page() method. pages and
469 nr_pages should be treated as for the read_or_alloc_pages() method.
470
471
472 (*) Request page be written to cache [mandatory]:
473
474 int (*write_page)(struct fscache_storage *op,
475 struct page *page);
476
477 This is called to write from a page on which there was a previously
478 successful read_or_alloc_page() call or similar. FS-Cache filters out
479 pages that don't have mappings.
480
481 This method is called asynchronously from the FS-Cache thread pool. It is
482 not required to actually store anything, provided -ENODATA is then
483 returned to the next read of this page.
484
485 If an error occurred, then a negative error code should be returned,
486 otherwise zero should be returned. FS-Cache will take appropriate action
487 in response to an error, such as withdrawing this object.
488
489 If this method returns success then FS-Cache will inform the netfs
490 appropriately.
491
492
493 (*) Discard retained per-page metadata [mandatory]:
494
495 void (*uncache_page)(struct fscache_object *object, struct page *page)
496
497 This is called when a netfs page is being evicted from the pagecache. The
498 cache backend should tear down any internal representation or tracking it
499 maintains for this page.
500
501
502==================
503FS-CACHE UTILITIES
504==================
505
506FS-Cache provides some utilities that a cache backend may make use of:
507
508 (*) Note occurrence of an I/O error in a cache:
509
510 void fscache_io_error(struct fscache_cache *cache)
511
512 This tells FS-Cache that an I/O error occurred in the cache. After this
513 has been called, only resource dissociation operations (object and page
514 release) will be passed from the netfs to the cache backend for the
515 specified cache.
516
517 This does not actually withdraw the cache. That must be done separately.
518
519
520 (*) Invoke the retrieval I/O completion function:
521
522 void fscache_end_io(struct fscache_retrieval *op, struct page *page,
523 int error);
524
525 This is called to note the end of an attempt to retrieve a page. The
526 error value should be 0 if successful and an error otherwise.
527
528
529 (*) Set highest store limit:
530
531 void fscache_set_store_limit(struct fscache_object *object,
532 loff_t i_size);
533
534 This sets the limit FS-Cache imposes on the highest byte it's willing to
535 try and store for a netfs. Any page over this limit is automatically
536 rejected by fscache_read_alloc_page() and co with -ENOBUFS.
537
538
539 (*) Mark pages as being cached:
540
541 void fscache_mark_pages_cached(struct fscache_retrieval *op,
542 struct pagevec *pagevec);
543
544 This marks a set of pages as being cached. After this has been called,
545 the netfs must call fscache_uncache_page() to unmark the pages.
546
547
548 (*) Perform coherency check on an object:
549
550 enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
551 const void *data,
552 uint16_t datalen);
553
554 This asks the netfs to perform a coherency check on an object that has
555 just been looked up. The cookie attached to the object will determine the
556 netfs to use. data and datalen should specify where the auxiliary data
557 retrieved from the cache can be found.
558
559 One of three values will be returned:
560
561 (*) FSCACHE_CHECKAUX_OKAY
562
563 The coherency data indicates the object is valid as is.
564
565 (*) FSCACHE_CHECKAUX_NEEDS_UPDATE
566
567 The coherency data needs updating, but otherwise the object is
568 valid.
569
570 (*) FSCACHE_CHECKAUX_OBSOLETE
571
572 The coherency data indicates that the object is obsolete and should
573 be discarded.
574
575
576 (*) Initialise a freshly allocated object:
577
578 void fscache_object_init(struct fscache_object *object);
579
580 This initialises all the fields in an object representation.
581
582
583 (*) Indicate the destruction of an object:
584
585 void fscache_object_destroyed(struct fscache_cache *cache);
586
587 This must be called to inform FS-Cache that an object that belonged to a
588 cache has been destroyed and deallocated. This will allow continuation
589 of the cache withdrawal process when it is stopped pending destruction of
590 all the objects.
591
592
593 (*) Indicate negative lookup on an object:
594
595 void fscache_object_lookup_negative(struct fscache_object *object);
596
597 This is called to indicate to FS-Cache that a lookup process for an object
598 found a negative result.
599
600 This changes the state of an object to permit reads pending on lookup
601 completion to go off and start fetching data from the netfs server as it's
602 known at this point that there can't be any data in the cache.
603
604 This may be called multiple times on an object. Only the first call is
605 significant - all subsequent calls are ignored.
606
607
608 (*) Indicate an object has been obtained:
609
610 void fscache_obtained_object(struct fscache_object *object);
611
612 This is called to indicate to FS-Cache that a lookup process for an object
613 produced a positive result, or that an object was created. This should
614 only be called once for any particular object.
615
616 This changes the state of an object to indicate:
617
618 (1) if no call to fscache_object_lookup_negative() has been made on
619 this object, that there may be data available, and that reads can
620 now go and look for it; and
621
622 (2) that writes may now proceed against this object.
623
624
625 (*) Indicate that object lookup failed:
626
627 void fscache_object_lookup_error(struct fscache_object *object);
628
629 This marks an object as having encountered a fatal error (usually EIO)
630 and causes it to move into a state whereby it will be withdrawn as soon
631 as possible.
632
633
634 (*) Get and release references on a retrieval record:
635
636 void fscache_get_retrieval(struct fscache_retrieval *op);
637 void fscache_put_retrieval(struct fscache_retrieval *op);
638
639 These two functions are used to retain a retrieval record whilst doing
640 asynchronous data retrieval and block allocation.
641
642
643 (*) Enqueue a retrieval record for processing.
644
645 void fscache_enqueue_retrieval(struct fscache_retrieval *op);
646
647 This enqueues a retrieval record for processing by the FS-Cache thread
648 pool. One of the threads in the pool will invoke the retrieval record's
649 op->op.processor callback function. This function may be called from
650 within the callback function.
651
652
653 (*) List of object state names:
654
655 const char *fscache_object_states[];
656
657 For debugging purposes, this may be used to turn the state that an object
658 is in into a text string for display purposes.
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt
new file mode 100644
index 000000000000..748a1ae49e12
--- /dev/null
+++ b/Documentation/filesystems/caching/cachefiles.txt
@@ -0,0 +1,501 @@
1 ===============================================
2 CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
3 ===============================================
4
5Contents:
6
7 (*) Overview.
8
9 (*) Requirements.
10
11 (*) Configuration.
12
13 (*) Starting the cache.
14
15 (*) Things to avoid.
16
17 (*) Cache culling.
18
19 (*) Cache structure.
20
21 (*) Security model and SELinux.
22
23 (*) A note on security.
24
25 (*) Statistical information.
26
27 (*) Debugging.
28
29
30========
31OVERVIEW
32========
33
34CacheFiles is a caching backend that's meant to use as a cache a directory on
35an already mounted filesystem of a local type (such as Ext3).
36
37CacheFiles uses a userspace daemon to do some of the cache management - such as
38reaping stale nodes and culling. This is called cachefilesd and lives in
39/sbin.
40
41The filesystem and data integrity of the cache are only as good as those of the
42filesystem providing the backing services. Note that CacheFiles does not
43attempt to journal anything since the journalling interfaces of the various
44filesystems are very specific in nature.
45
46CacheFiles creates a misc character device - "/dev/cachefiles" - that is used
47to communication with the daemon. Only one thing may have this open at once,
48and whilst it is open, a cache is at least partially in existence. The daemon
49opens this and sends commands down it to control the cache.
50
51CacheFiles is currently limited to a single cache.
52
53CacheFiles attempts to maintain at least a certain percentage of free space on
54the filesystem, shrinking the cache by culling the objects it contains to make
55space if necessary - see the "Cache Culling" section. This means it can be
56placed on the same medium as a live set of data, and will expand to make use of
57spare space and automatically contract when the set of data requires more
58space.
59
60
61============
62REQUIREMENTS
63============
64
65The use of CacheFiles and its daemon requires the following features to be
66available in the system and in the cache filesystem:
67
68 - dnotify.
69
70 - extended attributes (xattrs).
71
72 - openat() and friends.
73
74 - bmap() support on files in the filesystem (FIBMAP ioctl).
75
76 - The use of bmap() to detect a partial page at the end of the file.
77
78It is strongly recommended that the "dir_index" option is enabled on Ext3
79filesystems being used as a cache.
80
81
82=============
83CONFIGURATION
84=============
85
86The cache is configured by a script in /etc/cachefilesd.conf. These commands
87set up cache ready for use. The following script commands are available:
88
89 (*) brun <N>%
90 (*) bcull <N>%
91 (*) bstop <N>%
92 (*) frun <N>%
93 (*) fcull <N>%
94 (*) fstop <N>%
95
96 Configure the culling limits. Optional. See the section on culling
97 The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
98
99 The commands beginning with a 'b' are file space (block) limits, those
100 beginning with an 'f' are file count limits.
101
102 (*) dir <path>
103
104 Specify the directory containing the root of the cache. Mandatory.
105
106 (*) tag <name>
107
108 Specify a tag to FS-Cache to use in distinguishing multiple caches.
109 Optional. The default is "CacheFiles".
110
111 (*) debug <mask>
112
113 Specify a numeric bitmask to control debugging in the kernel module.
114 Optional. The default is zero (all off). The following values can be
115 OR'd into the mask to collect various information:
116
117 1 Turn on trace of function entry (_enter() macros)
118 2 Turn on trace of function exit (_leave() macros)
119 4 Turn on trace of internal debug points (_debug())
120
121 This mask can also be set through sysfs, eg:
122
123 echo 5 >/sys/modules/cachefiles/parameters/debug
124
125
126==================
127STARTING THE CACHE
128==================
129
130The cache is started by running the daemon. The daemon opens the cache device,
131configures the cache and tells it to begin caching. At that point the cache
132binds to fscache and the cache becomes live.
133
134The daemon is run as follows:
135
136 /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
137
138The flags are:
139
140 (*) -d
141
142 Increase the debugging level. This can be specified multiple times and
143 is cumulative with itself.
144
145 (*) -s
146
147 Send messages to stderr instead of syslog.
148
149 (*) -n
150
151 Don't daemonise and go into background.
152
153 (*) -f <configfile>
154
155 Use an alternative configuration file rather than the default one.
156
157
158===============
159THINGS TO AVOID
160===============
161
162Do not mount other things within the cache as this will cause problems. The
163kernel module contains its own very cut-down path walking facility that ignores
164mountpoints, but the daemon can't avoid them.
165
166Do not create, rename or unlink files and directories in the cache whilst the
167cache is active, as this may cause the state to become uncertain.
168
169Renaming files in the cache might make objects appear to be other objects (the
170filename is part of the lookup key).
171
172Do not change or remove the extended attributes attached to cache files by the
173cache as this will cause the cache state management to get confused.
174
175Do not create files or directories in the cache, lest the cache get confused or
176serve incorrect data.
177
178Do not chmod files in the cache. The module creates things with minimal
179permissions to prevent random users being able to access them directly.
180
181
182=============
183CACHE CULLING
184=============
185
186The cache may need culling occasionally to make space. This involves
187discarding objects from the cache that have been used less recently than
188anything else. Culling is based on the access time of data objects. Empty
189directories are culled if not in use.
190
191Cache culling is done on the basis of the percentage of blocks and the
192percentage of files available in the underlying filesystem. There are six
193"limits":
194
195 (*) brun
196 (*) frun
197
198 If the amount of free space and the number of available files in the cache
199 rises above both these limits, then culling is turned off.
200
201 (*) bcull
202 (*) fcull
203
204 If the amount of available space or the number of available files in the
205 cache falls below either of these limits, then culling is started.
206
207 (*) bstop
208 (*) fstop
209
210 If the amount of available space or the number of available files in the
211 cache falls below either of these limits, then no further allocation of
212 disk space or files is permitted until culling has raised things above
213 these limits again.
214
215These must be configured thusly:
216
217 0 <= bstop < bcull < brun < 100
218 0 <= fstop < fcull < frun < 100
219
220Note that these are percentages of available space and available files, and do
221_not_ appear as 100 minus the percentage displayed by the "df" program.
222
223The userspace daemon scans the cache to build up a table of cullable objects.
224These are then culled in least recently used order. A new scan of the cache is
225started as soon as space is made in the table. Objects will be skipped if
226their atimes have changed or if the kernel module says it is still using them.
227
228
229===============
230CACHE STRUCTURE
231===============
232
233The CacheFiles module will create two directories in the directory it was
234given:
235
236 (*) cache/
237
238 (*) graveyard/
239
240The active cache objects all reside in the first directory. The CacheFiles
241kernel module moves any retired or culled objects that it can't simply unlink
242to the graveyard from which the daemon will actually delete them.
243
244The daemon uses dnotify to monitor the graveyard directory, and will delete
245anything that appears therein.
246
247
248The module represents index objects as directories with the filename "I..." or
249"J...". Note that the "cache/" directory is itself a special index.
250
251Data objects are represented as files if they have no children, or directories
252if they do. Their filenames all begin "D..." or "E...". If represented as a
253directory, data objects will have a file in the directory called "data" that
254actually holds the data.
255
256Special objects are similar to data objects, except their filenames begin
257"S..." or "T...".
258
259
260If an object has children, then it will be represented as a directory.
261Immediately in the representative directory are a collection of directories
262named for hash values of the child object keys with an '@' prepended. Into
263this directory, if possible, will be placed the representations of the child
264objects:
265
266 INDEX INDEX INDEX DATA FILES
267 ========= ========== ================================= ================
268 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
269 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
270 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
271 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
272
273
274If the key is so long that it exceeds NAME_MAX with the decorations added on to
275it, then it will be cut into pieces, the first few of which will be used to
276make a nest of directories, and the last one of which will be the objects
277inside the last directory. The names of the intermediate directories will have
278'+' prepended:
279
280 J1223/@23/+xy...z/+kl...m/Epqr
281
282
283Note that keys are raw data, and not only may they exceed NAME_MAX in size,
284they may also contain things like '/' and NUL characters, and so they may not
285be suitable for turning directly into a filename.
286
287To handle this, CacheFiles will use a suitably printable filename directly and
288"base-64" encode ones that aren't directly suitable. The two versions of
289object filenames indicate the encoding:
290
291 OBJECT TYPE PRINTABLE ENCODED
292 =============== =============== ===============
293 Index "I..." "J..."
294 Data "D..." "E..."
295 Special "S..." "T..."
296
297Intermediate directories are always "@" or "+" as appropriate.
298
299
300Each object in the cache has an extended attribute label that holds the object
301type ID (required to distinguish special objects) and the auxiliary data from
302the netfs. The latter is used to detect stale objects in the cache and update
303or retire them.
304
305
306Note that CacheFiles will erase from the cache any file it doesn't recognise or
307any file of an incorrect type (such as a FIFO file or a device file).
308
309
310==========================
311SECURITY MODEL AND SELINUX
312==========================
313
314CacheFiles is implemented to deal properly with the LSM security features of
315the Linux kernel and the SELinux facility.
316
317One of the problems that CacheFiles faces is that it is generally acting on
318behalf of a process, and running in that process's context, and that includes a
319security context that is not appropriate for accessing the cache - either
320because the files in the cache are inaccessible to that process, or because if
321the process creates a file in the cache, that file may be inaccessible to other
322processes.
323
324The way CacheFiles works is to temporarily change the security context (fsuid,
325fsgid and actor security label) that the process acts as - without changing the
326security context of the process when it the target of an operation performed by
327some other process (so signalling and suchlike still work correctly).
328
329
330When the CacheFiles module is asked to bind to its cache, it:
331
332 (1) Finds the security label attached to the root cache directory and uses
333 that as the security label with which it will create files. By default,
334 this is:
335
336 cachefiles_var_t
337
338 (2) Finds the security label of the process which issued the bind request
339 (presumed to be the cachefilesd daemon), which by default will be:
340
341 cachefilesd_t
342
343 and asks LSM to supply a security ID as which it should act given the
344 daemon's label. By default, this will be:
345
346 cachefiles_kernel_t
347
348 SELinux transitions the daemon's security ID to the module's security ID
349 based on a rule of this form in the policy.
350
351 type_transition <daemon's-ID> kernel_t : process <module's-ID>;
352
353 For instance:
354
355 type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
356
357
358The module's security ID gives it permission to create, move and remove files
359and directories in the cache, to find and access directories and files in the
360cache, to set and access extended attributes on cache objects, and to read and
361write files in the cache.
362
363The daemon's security ID gives it only a very restricted set of permissions: it
364may scan directories, stat files and erase files and directories. It may
365not read or write files in the cache, and so it is precluded from accessing the
366data cached therein; nor is it permitted to create new files in the cache.
367
368
369There are policy source files available in:
370
371 http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
372
373and later versions. In that tarball, see the files:
374
375 cachefilesd.te
376 cachefilesd.fc
377 cachefilesd.if
378
379They are built and installed directly by the RPM.
380
381If a non-RPM based system is being used, then copy the above files to their own
382directory and run:
383
384 make -f /usr/share/selinux/devel/Makefile
385 semodule -i cachefilesd.pp
386
387You will need checkpolicy and selinux-policy-devel installed prior to the
388build.
389
390
391By default, the cache is located in /var/fscache, but if it is desirable that
392it should be elsewhere, than either the above policy files must be altered, or
393an auxiliary policy must be installed to label the alternate location of the
394cache.
395
396For instructions on how to add an auxiliary policy to enable the cache to be
397located elsewhere when SELinux is in enforcing mode, please see:
398
399 /usr/share/doc/cachefilesd-*/move-cache.txt
400
401When the cachefilesd rpm is installed; alternatively, the document can be found
402in the sources.
403
404
405==================
406A NOTE ON SECURITY
407==================
408
409CacheFiles makes use of the split security in the task_struct. It allocates
410its own task_security structure, and redirects current->cred to point to it
411when it acts on behalf of another process, in that process's context.
412
413The reason it does this is that it calls vfs_mkdir() and suchlike rather than
414bypassing security and calling inode ops directly. Therefore the VFS and LSM
415may deny the CacheFiles access to the cache data because under some
416circumstances the caching code is running in the security context of whatever
417process issued the original syscall on the netfs.
418
419Furthermore, should CacheFiles create a file or directory, the security
420parameters with that object is created (UID, GID, security label) would be
421derived from that process that issued the system call, thus potentially
422preventing other processes from accessing the cache - including CacheFiles's
423cache management daemon (cachefilesd).
424
425What is required is to temporarily override the security of the process that
426issued the system call. We can't, however, just do an in-place change of the
427security data as that affects the process as an object, not just as a subject.
428This means it may lose signals or ptrace events for example, and affects what
429the process looks like in /proc.
430
431So CacheFiles makes use of a logical split in the security between the
432objective security (task->real_cred) and the subjective security (task->cred).
433The objective security holds the intrinsic security properties of a process and
434is never overridden. This is what appears in /proc, and is what is used when a
435process is the target of an operation by some other process (SIGKILL for
436example).
437
438The subjective security holds the active security properties of a process, and
439may be overridden. This is not seen externally, and is used whan a process
440acts upon another object, for example SIGKILLing another process or opening a
441file.
442
443LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request
444for CacheFiles to run in a context of a specific security label, or to create
445files and directories with another security label.
446
447
448=======================
449STATISTICAL INFORMATION
450=======================
451
452If FS-Cache is compiled with the following option enabled:
453
454 CONFIG_CACHEFILES_HISTOGRAM=y
455
456then it will gather certain statistics and display them through a proc file.
457
458 (*) /proc/fs/cachefiles/histogram
459
460 cat /proc/fs/cachefiles/histogram
461 JIFS SECS LOOKUPS MKDIRS CREATES
462 ===== ===== ========= ========= =========
463
464 This shows the breakdown of the number of times each amount of time
465 between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
466 columns are as follows:
467
468 COLUMN TIME MEASUREMENT
469 ======= =======================================================
470 LOOKUPS Length of time to perform a lookup on the backing fs
471 MKDIRS Length of time to perform a mkdir on the backing fs
472 CREATES Length of time to perform a create on the backing fs
473
474 Each row shows the number of events that took a particular range of times.
475 Each step is 1 jiffy in size. The JIFS column indicates the particular
476 jiffy range covered, and the SECS field the equivalent number of seconds.
477
478
479=========
480DEBUGGING
481=========
482
483If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
484debugging enabled by adjusting the value in:
485
486 /sys/module/cachefiles/parameters/debug
487
488This is a bitmask of debugging streams to enable:
489
490 BIT VALUE STREAM POINT
491 ======= ======= =============================== =======================
492 0 1 General Function entry trace
493 1 2 Function exit trace
494 2 4 General
495
496The appropriate set of values should be OR'd together and the result written to
497the control file. For example:
498
499 echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
500
501will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
new file mode 100644
index 000000000000..9e94b9491d89
--- /dev/null
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -0,0 +1,333 @@
1 ==========================
2 General Filesystem Caching
3 ==========================
4
5========
6OVERVIEW
7========
8
9This facility is a general purpose cache for network filesystems, though it
10could be used for caching other things such as ISO9660 filesystems too.
11
12FS-Cache mediates between cache backends (such as CacheFS) and network
13filesystems:
14
15 +---------+
16 | | +--------------+
17 | NFS |--+ | |
18 | | | +-->| CacheFS |
19 +---------+ | +----------+ | | /dev/hda5 |
20 | | | | +--------------+
21 +---------+ +-->| | |
22 | | | |--+
23 | AFS |----->| FS-Cache |
24 | | | |--+
25 +---------+ +-->| | |
26 | | | | +--------------+
27 +---------+ | +----------+ | | |
28 | | | +-->| CacheFiles |
29 | ISOFS |--+ | /var/cache |
30 | | +--------------+
31 +---------+
32
33Or to look at it another way, FS-Cache is a module that provides a caching
34facility to a network filesystem such that the cache is transparent to the
35user:
36
37 +---------+
38 | |
39 | Server |
40 | |
41 +---------+
42 | NETWORK
43 ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
44 |
45 | +----------+
46 V | |
47 +---------+ | |
48 | | | |
49 | NFS |----->| FS-Cache |
50 | | | |--+
51 +---------+ | | | +--------------+ +--------------+
52 | | | | | | | |
53 V +----------+ +-->| CacheFiles |-->| Ext3 |
54 +---------+ | /var/cache | | /dev/sda6 |
55 | | +--------------+ +--------------+
56 | VFS | ^ ^
57 | | | |
58 +---------+ +--------------+ |
59 | KERNEL SPACE | |
60 ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
61 | USER SPACE | |
62 V | |
63 +---------+ +--------------+
64 | | | |
65 | Process | | cachefilesd |
66 | | | |
67 +---------+ +--------------+
68
69
70FS-Cache does not follow the idea of completely loading every netfs file
71opened in its entirety into a cache before permitting it to be accessed and
72then serving the pages out of that cache rather than the netfs inode because:
73
74 (1) It must be practical to operate without a cache.
75
76 (2) The size of any accessible file must not be limited to the size of the
77 cache.
78
79 (3) The combined size of all opened files (this includes mapped libraries)
80 must not be limited to the size of the cache.
81
82 (4) The user should not be forced to download an entire file just to do a
83 one-off access of a small portion of it (such as might be done with the
84 "file" program).
85
86It instead serves the cache out in PAGE_SIZE chunks as and when requested by
87the netfs('s) using it.
88
89
90FS-Cache provides the following facilities:
91
92 (1) More than one cache can be used at once. Caches can be selected
93 explicitly by use of tags.
94
95 (2) Caches can be added / removed at any time.
96
97 (3) The netfs is provided with an interface that allows either party to
98 withdraw caching facilities from a file (required for (2)).
99
100 (4) The interface to the netfs returns as few errors as possible, preferring
101 rather to let the netfs remain oblivious.
102
103 (5) Cookies are used to represent indices, files and other objects to the
104 netfs. The simplest cookie is just a NULL pointer - indicating nothing
105 cached there.
106
107 (6) The netfs is allowed to propose - dynamically - any index hierarchy it
108 desires, though it must be aware that the index search function is
109 recursive, stack space is limited, and indices can only be children of
110 indices.
111
112 (7) Data I/O is done direct to and from the netfs's pages. The netfs
113 indicates that page A is at index B of the data-file represented by cookie
114 C, and that it should be read or written. The cache backend may or may
115 not start I/O on that page, but if it does, a netfs callback will be
116 invoked to indicate completion. The I/O may be either synchronous or
117 asynchronous.
118
119 (8) Cookies can be "retired" upon release. At this point FS-Cache will mark
120 them as obsolete and the index hierarchy rooted at that point will get
121 recycled.
122
123 (9) The netfs provides a "match" function for index searches. In addition to
124 saying whether a match was made or not, this can also specify that an
125 entry should be updated or deleted.
126
127(10) As much as possible is done asynchronously.
128
129
130FS-Cache maintains a virtual indexing tree in which all indices, files, objects
131and pages are kept. Bits of this tree may actually reside in one or more
132caches.
133
134 FSDEF
135 |
136 +------------------------------------+
137 | |
138 NFS AFS
139 | |
140 +--------------------------+ +-----------+
141 | | | |
142 homedir mirror afs.org redhat.com
143 | | |
144 +------------+ +---------------+ +----------+
145 | | | | | |
146 00001 00002 00007 00125 vol00001 vol00002
147 | | | | |
148 +---+---+ +-----+ +---+ +------+------+ +-----+----+
149 | | | | | | | | | | | | |
150PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak
151 | |
152 PG0 +-------+
153 | |
154 00001 00003
155 |
156 +---+---+
157 | | |
158 PG0 PG1 PG2
159
160In the example above, you can see two netfs's being backed: NFS and AFS. These
161have different index hierarchies:
162
163 (*) The NFS primary index contains per-server indices. Each server index is
164 indexed by NFS file handles to get data file objects. Each data file
165 objects can have an array of pages, but may also have further child
166 objects, such as extended attributes and directory entries. Extended
167 attribute objects themselves have page-array contents.
168
169 (*) The AFS primary index contains per-cell indices. Each cell index contains
170 per-logical-volume indices. Each of volume index contains up to three
171 indices for the read-write, read-only and backup mirrors of those volumes.
172 Each of these contains vnode data file objects, each of which contains an
173 array of pages.
174
175The very top index is the FS-Cache master index in which individual netfs's
176have entries.
177
178Any index object may reside in more than one cache, provided it only has index
179children. Any index with non-index object children will be assumed to only
180reside in one cache.
181
182
183The netfs API to FS-Cache can be found in:
184
185 Documentation/filesystems/caching/netfs-api.txt
186
187The cache backend API to FS-Cache can be found in:
188
189 Documentation/filesystems/caching/backend-api.txt
190
191A description of the internal representations and object state machine can be
192found in:
193
194 Documentation/filesystems/caching/object.txt
195
196
197=======================
198STATISTICAL INFORMATION
199=======================
200
201If FS-Cache is compiled with the following options enabled:
202
203 CONFIG_FSCACHE_STATS=y
204 CONFIG_FSCACHE_HISTOGRAM=y
205
206then it will gather certain statistics and display them through a number of
207proc files.
208
209 (*) /proc/fs/fscache/stats
210
211 This shows counts of a number of events that can happen in FS-Cache:
212
213 CLASS EVENT MEANING
214 ======= ======= =======================================================
215 Cookies idx=N Number of index cookies allocated
216 dat=N Number of data storage cookies allocated
217 spc=N Number of special cookies allocated
218 Objects alc=N Number of objects allocated
219 nal=N Number of object allocation failures
220 avl=N Number of objects that reached the available state
221 ded=N Number of objects that reached the dead state
222 ChkAux non=N Number of objects that didn't have a coherency check
223 ok=N Number of objects that passed a coherency check
224 upd=N Number of objects that needed a coherency data update
225 obs=N Number of objects that were declared obsolete
226 Pages mrk=N Number of pages marked as being cached
227 unc=N Number of uncache page requests seen
228 Acquire n=N Number of acquire cookie requests seen
229 nul=N Number of acq reqs given a NULL parent
230 noc=N Number of acq reqs rejected due to no cache available
231 ok=N Number of acq reqs succeeded
232 nbf=N Number of acq reqs rejected due to error
233 oom=N Number of acq reqs failed on ENOMEM
234 Lookups n=N Number of lookup calls made on cache backends
235 neg=N Number of negative lookups made
236 pos=N Number of positive lookups made
237 crt=N Number of objects created by lookup
238 Updates n=N Number of update cookie requests seen
239 nul=N Number of upd reqs given a NULL parent
240 run=N Number of upd reqs granted CPU time
241 Relinqs n=N Number of relinquish cookie requests seen
242 nul=N Number of rlq reqs given a NULL parent
243 wcr=N Number of rlq reqs waited on completion of creation
244 AttrChg n=N Number of attribute changed requests seen
245 ok=N Number of attr changed requests queued
246 nbf=N Number of attr changed rejected -ENOBUFS
247 oom=N Number of attr changed failed -ENOMEM
248 run=N Number of attr changed ops given CPU time
249 Allocs n=N Number of allocation requests seen
250 ok=N Number of successful alloc reqs
251 wt=N Number of alloc reqs that waited on lookup completion
252 nbf=N Number of alloc reqs rejected -ENOBUFS
253 ops=N Number of alloc reqs submitted
254 owt=N Number of alloc reqs waited for CPU time
255 Retrvls n=N Number of retrieval (read) requests seen
256 ok=N Number of successful retr reqs
257 wt=N Number of retr reqs that waited on lookup completion
258 nod=N Number of retr reqs returned -ENODATA
259 nbf=N Number of retr reqs rejected -ENOBUFS
260 int=N Number of retr reqs aborted -ERESTARTSYS
261 oom=N Number of retr reqs failed -ENOMEM
262 ops=N Number of retr reqs submitted
263 owt=N Number of retr reqs waited for CPU time
264 Stores n=N Number of storage (write) requests seen
265 ok=N Number of successful store reqs
266 agn=N Number of store reqs on a page already pending storage
267 nbf=N Number of store reqs rejected -ENOBUFS
268 oom=N Number of store reqs failed -ENOMEM
269 ops=N Number of store reqs submitted
270 run=N Number of store reqs granted CPU time
271 Ops pend=N Number of times async ops added to pending queues
272 run=N Number of times async ops given CPU time
273 enq=N Number of times async ops queued for processing
274 dfr=N Number of async ops queued for deferred release
275 rel=N Number of async ops released
276 gc=N Number of deferred-release async ops garbage collected
277
278
279 (*) /proc/fs/fscache/histogram
280
281 cat /proc/fs/fscache/histogram
282 JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS
283 ===== ===== ========= ========= ========= ========= =========
284
285 This shows the breakdown of the number of times each amount of time
286 between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
287 columns are as follows:
288
289 COLUMN TIME MEASUREMENT
290 ======= =======================================================
291 OBJ INST Length of time to instantiate an object
292 OP RUNS Length of time a call to process an operation took
293 OBJ RUNS Length of time a call to process an object event took
294 RETRV DLY Time between an requesting a read and lookup completing
295 RETRIEVLS Time between beginning and end of a retrieval
296
297 Each row shows the number of events that took a particular range of times.
298 Each step is 1 jiffy in size. The JIFS column indicates the particular
299 jiffy range covered, and the SECS field the equivalent number of seconds.
300
301
302=========
303DEBUGGING
304=========
305
306If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
307debugging enabled by adjusting the value in:
308
309 /sys/module/fscache/parameters/debug
310
311This is a bitmask of debugging streams to enable:
312
313 BIT VALUE STREAM POINT
314 ======= ======= =============================== =======================
315 0 1 Cache management Function entry trace
316 1 2 Function exit trace
317 2 4 General
318 3 8 Cookie management Function entry trace
319 4 16 Function exit trace
320 5 32 General
321 6 64 Page handling Function entry trace
322 7 128 Function exit trace
323 8 256 General
324 9 512 Operation management Function entry trace
325 10 1024 Function exit trace
326 11 2048 General
327
328The appropriate set of values should be OR'd together and the result written to
329the control file. For example:
330
331 echo $((1|8|64)) >/sys/module/fscache/parameters/debug
332
333will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
new file mode 100644
index 000000000000..2666b1ed5e9e
--- /dev/null
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -0,0 +1,778 @@
1 ===============================
2 FS-CACHE NETWORK FILESYSTEM API
3 ===============================
4
5There's an API by which a network filesystem can make use of the FS-Cache
6facilities. This is based around a number of principles:
7
8 (1) Caches can store a number of different object types. There are two main
9 object types: indices and files. The first is a special type used by
10 FS-Cache to make finding objects faster and to make retiring of groups of
11 objects easier.
12
13 (2) Every index, file or other object is represented by a cookie. This cookie
14 may or may not have anything associated with it, but the netfs doesn't
15 need to care.
16
17 (3) Barring the top-level index (one entry per cached netfs), the index
18 hierarchy for each netfs is structured according the whim of the netfs.
19
20This API is declared in <linux/fscache.h>.
21
22This document contains the following sections:
23
24 (1) Network filesystem definition
25 (2) Index definition
26 (3) Object definition
27 (4) Network filesystem (un)registration
28 (5) Cache tag lookup
29 (6) Index registration
30 (7) Data file registration
31 (8) Miscellaneous object registration
32 (9) Setting the data file size
33 (10) Page alloc/read/write
34 (11) Page uncaching
35 (12) Index and data file update
36 (13) Miscellaneous cookie operations
37 (14) Cookie unregistration
38 (15) Index and data file invalidation
39 (16) FS-Cache specific page flags.
40
41
42=============================
43NETWORK FILESYSTEM DEFINITION
44=============================
45
46FS-Cache needs a description of the network filesystem. This is specified
47using a record of the following structure:
48
49 struct fscache_netfs {
50 uint32_t version;
51 const char *name;
52 struct fscache_cookie *primary_index;
53 ...
54 };
55
56This first two fields should be filled in before registration, and the third
57will be filled in by the registration function; any other fields should just be
58ignored and are for internal use only.
59
60The fields are:
61
62 (1) The name of the netfs (used as the key in the toplevel index).
63
64 (2) The version of the netfs (if the name matches but the version doesn't, the
65 entire in-cache hierarchy for this netfs will be scrapped and begun
66 afresh).
67
68 (3) The cookie representing the primary index will be allocated according to
69 another parameter passed into the registration function.
70
71For example, kAFS (linux/fs/afs/) uses the following definitions to describe
72itself:
73
74 struct fscache_netfs afs_cache_netfs = {
75 .version = 0,
76 .name = "afs",
77 };
78
79
80================
81INDEX DEFINITION
82================
83
84Indices are used for two purposes:
85
86 (1) To aid the finding of a file based on a series of keys (such as AFS's
87 "cell", "volume ID", "vnode ID").
88
89 (2) To make it easier to discard a subset of all the files cached based around
90 a particular key - for instance to mirror the removal of an AFS volume.
91
92However, since it's unlikely that any two netfs's are going to want to define
93their index hierarchies in quite the same way, FS-Cache tries to impose as few
94restraints as possible on how an index is structured and where it is placed in
95the tree. The netfs can even mix indices and data files at the same level, but
96it's not recommended.
97
98Each index entry consists of a key of indeterminate length plus some auxilliary
99data, also of indeterminate length.
100
101There are some limits on indices:
102
103 (1) Any index containing non-index objects should be restricted to a single
104 cache. Any such objects created within an index will be created in the
105 first cache only. The cache in which an index is created can be
106 controlled by cache tags (see below).
107
108 (2) The entry data must be atomically journallable, so it is limited to about
109 400 bytes at present. At least 400 bytes will be available.
110
111 (3) The depth of the index tree should be judged with care as the search
112 function is recursive. Too many layers will run the kernel out of stack.
113
114
115=================
116OBJECT DEFINITION
117=================
118
119To define an object, a structure of the following type should be filled out:
120
121 struct fscache_cookie_def
122 {
123 uint8_t name[16];
124 uint8_t type;
125
126 struct fscache_cache_tag *(*select_cache)(
127 const void *parent_netfs_data,
128 const void *cookie_netfs_data);
129
130 uint16_t (*get_key)(const void *cookie_netfs_data,
131 void *buffer,
132 uint16_t bufmax);
133
134 void (*get_attr)(const void *cookie_netfs_data,
135 uint64_t *size);
136
137 uint16_t (*get_aux)(const void *cookie_netfs_data,
138 void *buffer,
139 uint16_t bufmax);
140
141 enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
142 const void *data,
143 uint16_t datalen);
144
145 void (*get_context)(void *cookie_netfs_data, void *context);
146
147 void (*put_context)(void *cookie_netfs_data, void *context);
148
149 void (*mark_pages_cached)(void *cookie_netfs_data,
150 struct address_space *mapping,
151 struct pagevec *cached_pvec);
152
153 void (*now_uncached)(void *cookie_netfs_data);
154 };
155
156This has the following fields:
157
158 (1) The type of the object [mandatory].
159
160 This is one of the following values:
161
162 (*) FSCACHE_COOKIE_TYPE_INDEX
163
164 This defines an index, which is a special FS-Cache type.
165
166 (*) FSCACHE_COOKIE_TYPE_DATAFILE
167
168 This defines an ordinary data file.
169
170 (*) Any other value between 2 and 255
171
172 This defines an extraordinary object such as an XATTR.
173
174 (2) The name of the object type (NUL terminated unless all 16 chars are used)
175 [optional].
176
177 (3) A function to select the cache in which to store an index [optional].
178
179 This function is invoked when an index needs to be instantiated in a cache
180 during the instantiation of a non-index object. Only the immediate index
181 parent for the non-index object will be queried. Any indices above that
182 in the hierarchy may be stored in multiple caches. This function does not
183 need to be supplied for any non-index object or any index that will only
184 have index children.
185
186 If this function is not supplied or if it returns NULL then the first
187 cache in the parent's list will be chosen, or failing that, the first
188 cache in the master list.
189
190 (4) A function to retrieve an object's key from the netfs [mandatory].
191
192 This function will be called with the netfs data that was passed to the
193 cookie acquisition function and the maximum length of key data that it may
194 provide. It should write the required key data into the given buffer and
195 return the quantity it wrote.
196
197 (5) A function to retrieve attribute data from the netfs [optional].
198
199 This function will be called with the netfs data that was passed to the
200 cookie acquisition function. It should return the size of the file if
201 this is a data file. The size may be used to govern how much cache must
202 be reserved for this file in the cache.
203
204 If the function is absent, a file size of 0 is assumed.
205
206 (6) A function to retrieve auxilliary data from the netfs [optional].
207
208 This function will be called with the netfs data that was passed to the
209 cookie acquisition function and the maximum length of auxilliary data that
210 it may provide. It should write the auxilliary data into the given buffer
211 and return the quantity it wrote.
212
213 If this function is absent, the auxilliary data length will be set to 0.
214
215 The length of the auxilliary data buffer may be dependent on the key
216 length. A netfs mustn't rely on being able to provide more than 400 bytes
217 for both.
218
219 (7) A function to check the auxilliary data [optional].
220
221 This function will be called to check that a match found in the cache for
222 this object is valid. For instance with AFS it could check the auxilliary
223 data against the data version number returned by the server to determine
224 whether the index entry in a cache is still valid.
225
226 If this function is absent, it will be assumed that matching objects in a
227 cache are always valid.
228
229 If present, the function should return one of the following values:
230
231 (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is
232 (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update
233 (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted
234
235 This function can also be used to extract data from the auxilliary data in
236 the cache and copy it into the netfs's structures.
237
238 (8) A pair of functions to manage contexts for the completion callback
239 [optional].
240
241 The cache read/write functions are passed a context which is then passed
242 to the I/O completion callback function. To ensure this context remains
243 valid until after the I/O completion is called, two functions may be
244 provided: one to get an extra reference on the context, and one to drop a
245 reference to it.
246
247 If the context is not used or is a type of object that won't go out of
248 scope, then these functions are not required. These functions are not
249 required for indices as indices may not contain data. These functions may
250 be called in interrupt context and so may not sleep.
251
252 (9) A function to mark a page as retaining cache metadata [optional].
253
254 This is called by the cache to indicate that it is retaining in-memory
255 information for this page and that the netfs should uncache the page when
256 it has finished. This does not indicate whether there's data on the disk
257 or not. Note that several pages at once may be presented for marking.
258
259 The PG_fscache bit is set on the pages before this function would be
260 called, so the function need not be provided if this is sufficient.
261
262 This function is not required for indices as they're not permitted data.
263
264(10) A function to unmark all the pages retaining cache metadata [mandatory].
265
266 This is called by FS-Cache to indicate that a backing store is being
267 unbound from a cookie and that all the marks on the pages should be
268 cleared to prevent confusion. Note that the cache will have torn down all
269 its tracking information so that the pages don't need to be explicitly
270 uncached.
271
272 This function is not required for indices as they're not permitted data.
273
274
275===================================
276NETWORK FILESYSTEM (UN)REGISTRATION
277===================================
278
279The first step is to declare the network filesystem to the cache. This also
280involves specifying the layout of the primary index (for AFS, this would be the
281"cell" level).
282
283The registration function is:
284
285 int fscache_register_netfs(struct fscache_netfs *netfs);
286
287It just takes a pointer to the netfs definition. It returns 0 or an error as
288appropriate.
289
290For kAFS, registration is done as follows:
291
292 ret = fscache_register_netfs(&afs_cache_netfs);
293
294The last step is, of course, unregistration:
295
296 void fscache_unregister_netfs(struct fscache_netfs *netfs);
297
298
299================
300CACHE TAG LOOKUP
301================
302
303FS-Cache permits the use of more than one cache. To permit particular index
304subtrees to be bound to particular caches, the second step is to look up cache
305representation tags. This step is optional; it can be left entirely up to
306FS-Cache as to which cache should be used. The problem with doing that is that
307FS-Cache will always pick the first cache that was registered.
308
309To get the representation for a named tag:
310
311 struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
312
313This takes a text string as the name and returns a representation of a tag. It
314will never return an error. It may return a dummy tag, however, if it runs out
315of memory; this will inhibit caching with this tag.
316
317Any representation so obtained must be released by passing it to this function:
318
319 void fscache_release_cache_tag(struct fscache_cache_tag *tag);
320
321The tag will be retrieved by FS-Cache when it calls the object definition
322operation select_cache().
323
324
325==================
326INDEX REGISTRATION
327==================
328
329The third step is to inform FS-Cache about part of an index hierarchy that can
330be used to locate files. This is done by requesting a cookie for each index in
331the path to the file:
332
333 struct fscache_cookie *
334 fscache_acquire_cookie(struct fscache_cookie *parent,
335 const struct fscache_object_def *def,
336 void *netfs_data);
337
338This function creates an index entry in the index represented by parent,
339filling in the index entry by calling the operations pointed to by def.
340
341Note that this function never returns an error - all errors are handled
342internally. It may, however, return NULL to indicate no cookie. It is quite
343acceptable to pass this token back to this function as the parent to another
344acquisition (or even to the relinquish cookie, read page and write page
345functions - see below).
346
347Note also that no indices are actually created in a cache until a non-index
348object needs to be created somewhere down the hierarchy. Furthermore, an index
349may be created in several different caches independently at different times.
350This is all handled transparently, and the netfs doesn't see any of it.
351
352For example, with AFS, a cell would be added to the primary index. This index
353entry would have a dependent inode containing a volume location index for the
354volume mappings within this cell:
355
356 cell->cache =
357 fscache_acquire_cookie(afs_cache_netfs.primary_index,
358 &afs_cell_cache_index_def,
359 cell);
360
361Then when a volume location was accessed, it would be entered into the cell's
362index and an inode would be allocated that acts as a volume type and hash chain
363combination:
364
365 vlocation->cache =
366 fscache_acquire_cookie(cell->cache,
367 &afs_vlocation_cache_index_def,
368 vlocation);
369
370And then a particular flavour of volume (R/O for example) could be added to
371that index, creating another index for vnodes (AFS inode equivalents):
372
373 volume->cache =
374 fscache_acquire_cookie(vlocation->cache,
375 &afs_volume_cache_index_def,
376 volume);
377
378
379======================
380DATA FILE REGISTRATION
381======================
382
383The fourth step is to request a data file be created in the cache. This is
384identical to index cookie acquisition. The only difference is that the type in
385the object definition should be something other than index type.
386
387 vnode->cache =
388 fscache_acquire_cookie(volume->cache,
389 &afs_vnode_cache_object_def,
390 vnode);
391
392
393=================================
394MISCELLANEOUS OBJECT REGISTRATION
395=================================
396
397An optional step is to request an object of miscellaneous type be created in
398the cache. This is almost identical to index cookie acquisition. The only
399difference is that the type in the object definition should be something other
400than index type. Whilst the parent object could be an index, it's more likely
401it would be some other type of object such as a data file.
402
403 xattr->cache =
404 fscache_acquire_cookie(vnode->cache,
405 &afs_xattr_cache_object_def,
406 xattr);
407
408Miscellaneous objects might be used to store extended attributes or directory
409entries for example.
410
411
412==========================
413SETTING THE DATA FILE SIZE
414==========================
415
416The fifth step is to set the physical attributes of the file, such as its size.
417This doesn't automatically reserve any space in the cache, but permits the
418cache to adjust its metadata for data tracking appropriately:
419
420 int fscache_attr_changed(struct fscache_cookie *cookie);
421
422The cache will return -ENOBUFS if there is no backing cache or if there is no
423space to allocate any extra metadata required in the cache. The attributes
424will be accessed with the get_attr() cookie definition operation.
425
426Note that attempts to read or write data pages in the cache over this size may
427be rebuffed with -ENOBUFS.
428
429This operation schedules an attribute adjustment to happen asynchronously at
430some point in the future, and as such, it may happen after the function returns
431to the caller. The attribute adjustment excludes read and write operations.
432
433
434=====================
435PAGE READ/ALLOC/WRITE
436=====================
437
438And the sixth step is to store and retrieve pages in the cache. There are
439three functions that are used to do this.
440
441Note:
442
443 (1) A page should not be re-read or re-allocated without uncaching it first.
444
445 (2) A read or allocated page must be uncached when the netfs page is released
446 from the pagecache.
447
448 (3) A page should only be written to the cache if previous read or allocated.
449
450This permits the cache to maintain its page tracking in proper order.
451
452
453PAGE READ
454---------
455
456Firstly, the netfs should ask FS-Cache to examine the caches and read the
457contents cached for a particular page of a particular file if present, or else
458allocate space to store the contents if not:
459
460 typedef
461 void (*fscache_rw_complete_t)(struct page *page,
462 void *context,
463 int error);
464
465 int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
466 struct page *page,
467 fscache_rw_complete_t end_io_func,
468 void *context,
469 gfp_t gfp);
470
471The cookie argument must specify a cookie for an object that isn't an index,
472the page specified will have the data loaded into it (and is also used to
473specify the page number), and the gfp argument is used to control how any
474memory allocations made are satisfied.
475
476If the cookie indicates the inode is not cached:
477
478 (1) The function will return -ENOBUFS.
479
480Else if there's a copy of the page resident in the cache:
481
482 (1) The mark_pages_cached() cookie operation will be called on that page.
483
484 (2) The function will submit a request to read the data from the cache's
485 backing device directly into the page specified.
486
487 (3) The function will return 0.
488
489 (4) When the read is complete, end_io_func() will be invoked with:
490
491 (*) The netfs data supplied when the cookie was created.
492
493 (*) The page descriptor.
494
495 (*) The context argument passed to the above function. This will be
496 maintained with the get_context/put_context functions mentioned above.
497
498 (*) An argument that's 0 on success or negative for an error code.
499
500 If an error occurs, it should be assumed that the page contains no usable
501 data.
502
503 end_io_func() will be called in process context if the read is results in
504 an error, but it might be called in interrupt context if the read is
505 successful.
506
507Otherwise, if there's not a copy available in cache, but the cache may be able
508to store the page:
509
510 (1) The mark_pages_cached() cookie operation will be called on that page.
511
512 (2) A block may be reserved in the cache and attached to the object at the
513 appropriate place.
514
515 (3) The function will return -ENODATA.
516
517This function may also return -ENOMEM or -EINTR, in which case it won't have
518read any data from the cache.
519
520
521PAGE ALLOCATE
522-------------
523
524Alternatively, if there's not expected to be any data in the cache for a page
525because the file has been extended, a block can simply be allocated instead:
526
527 int fscache_alloc_page(struct fscache_cookie *cookie,
528 struct page *page,
529 gfp_t gfp);
530
531This is similar to the fscache_read_or_alloc_page() function, except that it
532never reads from the cache. It will return 0 if a block has been allocated,
533rather than -ENODATA as the other would. One or the other must be performed
534before writing to the cache.
535
536The mark_pages_cached() cookie operation will be called on the page if
537successful.
538
539
540PAGE WRITE
541----------
542
543Secondly, if the netfs changes the contents of the page (either due to an
544initial download or if a user performs a write), then the page should be
545written back to the cache:
546
547 int fscache_write_page(struct fscache_cookie *cookie,
548 struct page *page,
549 gfp_t gfp);
550
551The cookie argument must specify a data file cookie, the page specified should
552contain the data to be written (and is also used to specify the page number),
553and the gfp argument is used to control how any memory allocations made are
554satisfied.
555
556The page must have first been read or allocated successfully and must not have
557been uncached before writing is performed.
558
559If the cookie indicates the inode is not cached then:
560
561 (1) The function will return -ENOBUFS.
562
563Else if space can be allocated in the cache to hold this page:
564
565 (1) PG_fscache_write will be set on the page.
566
567 (2) The function will submit a request to write the data to cache's backing
568 device directly from the page specified.
569
570 (3) The function will return 0.
571
572 (4) When the write is complete PG_fscache_write is cleared on the page and
573 anyone waiting for that bit will be woken up.
574
575Else if there's no space available in the cache, -ENOBUFS will be returned. It
576is also possible for the PG_fscache_write bit to be cleared when no write took
577place if unforeseen circumstances arose (such as a disk error).
578
579Writing takes place asynchronously.
580
581
582MULTIPLE PAGE READ
583------------------
584
585A facility is provided to read several pages at once, as requested by the
586readpages() address space operation:
587
588 int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
589 struct address_space *mapping,
590 struct list_head *pages,
591 int *nr_pages,
592 fscache_rw_complete_t end_io_func,
593 void *context,
594 gfp_t gfp);
595
596This works in a similar way to fscache_read_or_alloc_page(), except:
597
598 (1) Any page it can retrieve data for is removed from pages and nr_pages and
599 dispatched for reading to the disk. Reads of adjacent pages on disk may
600 be merged for greater efficiency.
601
602 (2) The mark_pages_cached() cookie operation will be called on several pages
603 at once if they're being read or allocated.
604
605 (3) If there was an general error, then that error will be returned.
606
607 Else if some pages couldn't be allocated or read, then -ENOBUFS will be
608 returned.
609
610 Else if some pages couldn't be read but were allocated, then -ENODATA will
611 be returned.
612
613 Otherwise, if all pages had reads dispatched, then 0 will be returned, the
614 list will be empty and *nr_pages will be 0.
615
616 (4) end_io_func will be called once for each page being read as the reads
617 complete. It will be called in process context if error != 0, but it may
618 be called in interrupt context if there is no error.
619
620Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
621some of the pages being read and some being allocated. Those pages will have
622been marked appropriately and will need uncaching.
623
624
625==============
626PAGE UNCACHING
627==============
628
629To uncache a page, this function should be called:
630
631 void fscache_uncache_page(struct fscache_cookie *cookie,
632 struct page *page);
633
634This function permits the cache to release any in-memory representation it
635might be holding for this netfs page. This function must be called once for
636each page on which the read or write page functions above have been called to
637make sure the cache's in-memory tracking information gets torn down.
638
639Note that pages can't be explicitly deleted from the a data file. The whole
640data file must be retired (see the relinquish cookie function below).
641
642Furthermore, note that this does not cancel the asynchronous read or write
643operation started by the read/alloc and write functions, so the page
644invalidation and release functions must use:
645
646 bool fscache_check_page_write(struct fscache_cookie *cookie,
647 struct page *page);
648
649to see if a page is being written to the cache, and:
650
651 void fscache_wait_on_page_write(struct fscache_cookie *cookie,
652 struct page *page);
653
654to wait for it to finish if it is.
655
656
657==========================
658INDEX AND DATA FILE UPDATE
659==========================
660
661To request an update of the index data for an index or other object, the
662following function should be called:
663
664 void fscache_update_cookie(struct fscache_cookie *cookie);
665
666This function will refer back to the netfs_data pointer stored in the cookie by
667the acquisition function to obtain the data to write into each revised index
668entry. The update method in the parent index definition will be called to
669transfer the data.
670
671Note that partial updates may happen automatically at other times, such as when
672data blocks are added to a data file object.
673
674
675===============================
676MISCELLANEOUS COOKIE OPERATIONS
677===============================
678
679There are a number of operations that can be used to control cookies:
680
681 (*) Cookie pinning:
682
683 int fscache_pin_cookie(struct fscache_cookie *cookie);
684 void fscache_unpin_cookie(struct fscache_cookie *cookie);
685
686 These operations permit data cookies to be pinned into the cache and to
687 have the pinning removed. They are not permitted on index cookies.
688
689 The pinning function will return 0 if successful, -ENOBUFS in the cookie
690 isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
691 -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
692 -EIO if there's any other problem.
693
694 (*) Data space reservation:
695
696 int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
697
698 This permits a netfs to request cache space be reserved to store up to the
699 given amount of a file. It is permitted to ask for more than the current
700 size of the file to allow for future file expansion.
701
702 If size is given as zero then the reservation will be cancelled.
703
704 The function will return 0 if successful, -ENOBUFS in the cookie isn't
705 backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
706 -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
707 -EIO if there's any other problem.
708
709 Note that this doesn't pin an object in a cache; it can still be culled to
710 make space if it's not in use.
711
712
713=====================
714COOKIE UNREGISTRATION
715=====================
716
717To get rid of a cookie, this function should be called.
718
719 void fscache_relinquish_cookie(struct fscache_cookie *cookie,
720 int retire);
721
722If retire is non-zero, then the object will be marked for recycling, and all
723copies of it will be removed from all active caches in which it is present.
724Not only that but all child objects will also be retired.
725
726If retire is zero, then the object may be available again when next the
727acquisition function is called. Retirement here will overrule the pinning on a
728cookie.
729
730One very important note - relinquish must NOT be called for a cookie unless all
731the cookies for "child" indices, objects and pages have been relinquished
732first.
733
734
735================================
736INDEX AND DATA FILE INVALIDATION
737================================
738
739There is no direct way to invalidate an index subtree or a data file. To do
740this, the caller should relinquish and retire the cookie they have, and then
741acquire a new one.
742
743
744===========================
745FS-CACHE SPECIFIC PAGE FLAG
746===========================
747
748FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is
749given the alternative name PG_fscache.
750
751PG_fscache is used to indicate that the page is known by the cache, and that
752the cache must be informed if the page is going to go away. It's an indication
753to the netfs that the cache has an interest in this page, where an interest may
754be a pointer to it, resources allocated or reserved for it, or I/O in progress
755upon it.
756
757The netfs can use this information in methods such as releasepage() to
758determine whether it needs to uncache a page or update it.
759
760Furthermore, if this bit is set, releasepage() and invalidatepage() operations
761will be called on a page to get rid of it, even if PG_private is not set. This
762allows caching to attempted on a page before read_cache_pages() to be called
763after fscache_read_or_alloc_pages() as the former will try and release pages it
764was given under certain circumstances.
765
766This bit does not overlap with such as PG_private. This means that FS-Cache
767can be used with a filesystem that uses the block buffering code.
768
769There are a number of operations defined on this flag:
770
771 int PageFsCache(struct page *page);
772 void SetPageFsCache(struct page *page)
773 void ClearPageFsCache(struct page *page)
774 int TestSetPageFsCache(struct page *page)
775 int TestClearPageFsCache(struct page *page)
776
777These functions are bit test, bit set, bit clear, bit test and set and bit
778test and clear operations on PG_fscache.
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
new file mode 100644
index 000000000000..e8b0a35d8fe5
--- /dev/null
+++ b/Documentation/filesystems/caching/object.txt
@@ -0,0 +1,313 @@
1 ====================================================
2 IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT
3 ====================================================
4
5By: David Howells <dhowells@redhat.com>
6
7Contents:
8
9 (*) Representation
10
11 (*) Object management state machine.
12
13 - Provision of cpu time.
14 - Locking simplification.
15
16 (*) The set of states.
17
18 (*) The set of events.
19
20
21==============
22REPRESENTATION
23==============
24
25FS-Cache maintains an in-kernel representation of each object that a netfs is
26currently interested in. Such objects are represented by the fscache_cookie
27struct and are referred to as cookies.
28
29FS-Cache also maintains a separate in-kernel representation of the objects that
30a cache backend is currently actively caching. Such objects are represented by
31the fscache_object struct. The cache backends allocate these upon request, and
32are expected to embed them in their own representations. These are referred to
33as objects.
34
35There is a 1:N relationship between cookies and objects. A cookie may be
36represented by multiple objects - an index may exist in more than one cache -
37or even by no objects (it may not be cached).
38
39Furthermore, both cookies and objects are hierarchical. The two hierarchies
40correspond, but the cookies tree is a superset of the union of the object trees
41of multiple caches:
42
43 NETFS INDEX TREE : CACHE 1 : CACHE 2
44 : :
45 : +-----------+ :
46 +----------->| IObject | :
47 +-----------+ | : +-----------+ :
48 | ICookie |-------+ : | :
49 +-----------+ | : | : +-----------+
50 | +------------------------------>| IObject |
51 | : | : +-----------+
52 | : V : |
53 | : +-----------+ : |
54 V +----------->| IObject | : |
55 +-----------+ | : +-----------+ : |
56 | ICookie |-------+ : | : V
57 +-----------+ | : | : +-----------+
58 | +------------------------------>| IObject |
59 +-----+-----+ : | : +-----------+
60 | | : | : |
61 V | : V : |
62 +-----------+ | : +-----------+ : |
63 | ICookie |------------------------->| IObject | : |
64 +-----------+ | : +-----------+ : |
65 | V : | : V
66 | +-----------+ : | : +-----------+
67 | | ICookie |-------------------------------->| IObject |
68 | +-----------+ : | : +-----------+
69 V | : V : |
70 +-----------+ | : +-----------+ : |
71 | DCookie |------------------------->| DObject | : |
72 +-----------+ | : +-----------+ : |
73 | : : |
74 +-------+-------+ : : |
75 | | : : |
76 V V : : V
77 +-----------+ +-----------+ : : +-----------+
78 | DCookie | | DCookie |------------------------>| DObject |
79 +-----------+ +-----------+ : : +-----------+
80 : :
81
82In the above illustration, ICookie and IObject represent indices and DCookie
83and DObject represent data storage objects. Indices may have representation in
84multiple caches, but currently, non-index objects may not. Objects of any type
85may also be entirely unrepresented.
86
87As far as the netfs API goes, the netfs is only actually permitted to see
88pointers to the cookies. The cookies themselves and any objects attached to
89those cookies are hidden from it.
90
91
92===============================
93OBJECT MANAGEMENT STATE MACHINE
94===============================
95
96Within FS-Cache, each active object is managed by its own individual state
97machine. The state for an object is kept in the fscache_object struct, in
98object->state. A cookie may point to a set of objects that are in different
99states.
100
101Each state has an action associated with it that is invoked when the machine
102wakes up in that state. There are four logical sets of states:
103
104 (1) Preparation: states that wait for the parent objects to become ready. The
105 representations are hierarchical, and it is expected that an object must
106 be created or accessed with respect to its parent object.
107
108 (2) Initialisation: states that perform lookups in the cache and validate
109 what's found and that create on disk any missing metadata.
110
111 (3) Normal running: states that allow netfs operations on objects to proceed
112 and that update the state of objects.
113
114 (4) Termination: states that detach objects from their netfs cookies, that
115 delete objects from disk, that handle disk and system errors and that free
116 up in-memory resources.
117
118
119In most cases, transitioning between states is in response to signalled events.
120When a state has finished processing, it will usually set the mask of events in
121which it is interested (object->event_mask) and relinquish the worker thread.
122Then when an event is raised (by calling fscache_raise_event()), if the event
123is not masked, the object will be queued for processing (by calling
124fscache_enqueue_object()).
125
126
127PROVISION OF CPU TIME
128---------------------
129
130The work to be done by the various states is given CPU time by the threads of
131the slow work facility (see Documentation/slow-work.txt). This is used in
132preference to the workqueue facility because:
133
134 (1) Threads may be completely occupied for very long periods of time by a
135 particular work item. These state actions may be doing sequences of
136 synchronous, journalled disk accesses (lookup, mkdir, create, setxattr,
137 getxattr, truncate, unlink, rmdir, rename).
138
139 (2) Threads may do little actual work, but may rather spend a lot of time
140 sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded
141 workqueues don't necessarily have the right numbers of threads.
142
143
144LOCKING SIMPLIFICATION
145----------------------
146
147Because only one worker thread may be operating on any particular object's
148state machine at once, this simplifies the locking, particularly with respect
149to disconnecting the netfs's representation of a cache object (fscache_cookie)
150from the cache backend's representation (fscache_object) - which may be
151requested from either end.
152
153
154=================
155THE SET OF STATES
156=================
157
158The object state machine has a set of states that it can be in. There are
159preparation states in which the object sets itself up and waits for its parent
160object to transit to a state that allows access to its children:
161
162 (1) State FSCACHE_OBJECT_INIT.
163
164 Initialise the object and wait for the parent object to become active. In
165 the cache, it is expected that it will not be possible to look an object
166 up from the parent object, until that parent object itself has been looked
167 up.
168
169There are initialisation states in which the object sets itself up and accesses
170disk for the object metadata:
171
172 (2) State FSCACHE_OBJECT_LOOKING_UP.
173
174 Look up the object on disk, using the parent as a starting point.
175 FS-Cache expects the cache backend to probe the cache to see whether this
176 object is represented there, and if it is, to see if it's valid (coherency
177 management).
178
179 The cache should call fscache_object_lookup_negative() to indicate lookup
180 failure for whatever reason, and should call fscache_obtained_object() to
181 indicate success.
182
183 At the completion of lookup, FS-Cache will let the netfs go ahead with
184 read operations, no matter whether the file is yet cached. If not yet
185 cached, read operations will be immediately rejected with ENODATA until
186 the first known page is uncached - as to that point there can be no data
187 to be read out of the cache for that file that isn't currently also held
188 in the pagecache.
189
190 (3) State FSCACHE_OBJECT_CREATING.
191
192 Create an object on disk, using the parent as a starting point. This
193 happens if the lookup failed to find the object, or if the object's
194 coherency data indicated what's on disk is out of date. In this state,
195 FS-Cache expects the cache to create
196
197 The cache should call fscache_obtained_object() if creation completes
198 successfully, fscache_object_lookup_negative() otherwise.
199
200 At the completion of creation, FS-Cache will start processing write
201 operations the netfs has queued for an object. If creation failed, the
202 write ops will be transparently discarded, and nothing recorded in the
203 cache.
204
205There are some normal running states in which the object spends its time
206servicing netfs requests:
207
208 (4) State FSCACHE_OBJECT_AVAILABLE.
209
210 A transient state in which pending operations are started, child objects
211 are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary
212 lookup data is freed.
213
214 (5) State FSCACHE_OBJECT_ACTIVE.
215
216 The normal running state. In this state, requests the netfs makes will be
217 passed on to the cache.
218
219 (6) State FSCACHE_OBJECT_UPDATING.
220
221 The state machine comes here to update the object in the cache from the
222 netfs's records. This involves updating the auxiliary data that is used
223 to maintain coherency.
224
225And there are terminal states in which an object cleans itself up, deallocates
226memory and potentially deletes stuff from disk:
227
228 (7) State FSCACHE_OBJECT_LC_DYING.
229
230 The object comes here if it is dying because of a lookup or creation
231 error. This would be due to a disk error or system error of some sort.
232 Temporary data is cleaned up, and the parent is released.
233
234 (8) State FSCACHE_OBJECT_DYING.
235
236 The object comes here if it is dying due to an error, because its parent
237 cookie has been relinquished by the netfs or because the cache is being
238 withdrawn.
239
240 Any child objects waiting on this one are given CPU time so that they too
241 can destroy themselves. This object waits for all its children to go away
242 before advancing to the next state.
243
244 (9) State FSCACHE_OBJECT_ABORT_INIT.
245
246 The object comes to this state if it was waiting on its parent in
247 FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself
248 so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
249
250(10) State FSCACHE_OBJECT_RELEASING.
251(11) State FSCACHE_OBJECT_RECYCLING.
252
253 The object comes to one of these two states when dying once it is rid of
254 all its children, if it is dying because the netfs relinquished its
255 cookie. In the first state, the cached data is expected to persist, and
256 in the second it will be deleted.
257
258(12) State FSCACHE_OBJECT_WITHDRAWING.
259
260 The object transits to this state if the cache decides it wants to
261 withdraw the object from service, perhaps to make space, but also due to
262 error or just because the whole cache is being withdrawn.
263
264(13) State FSCACHE_OBJECT_DEAD.
265
266 The object transits to this state when the in-memory object record is
267 ready to be deleted. The object processor shouldn't ever see an object in
268 this state.
269
270
271THE SET OF EVENTS
272-----------------
273
274There are a number of events that can be raised to an object state machine:
275
276 (*) FSCACHE_OBJECT_EV_UPDATE
277
278 The netfs requested that an object be updated. The state machine will ask
279 the cache backend to update the object, and the cache backend will ask the
280 netfs for details of the change through its cookie definition ops.
281
282 (*) FSCACHE_OBJECT_EV_CLEARED
283
284 This is signalled in two circumstances:
285
286 (a) when an object's last child object is dropped and
287
288 (b) when the last operation outstanding on an object is completed.
289
290 This is used to proceed from the dying state.
291
292 (*) FSCACHE_OBJECT_EV_ERROR
293
294 This is signalled when an I/O error occurs during the processing of some
295 object.
296
297 (*) FSCACHE_OBJECT_EV_RELEASE
298 (*) FSCACHE_OBJECT_EV_RETIRE
299
300 These are signalled when the netfs relinquishes a cookie it was using.
301 The event selected depends on whether the netfs asks for the backing
302 object to be retired (deleted) or retained.
303
304 (*) FSCACHE_OBJECT_EV_WITHDRAW
305
306 This is signalled when the cache backend wants to withdraw an object.
307 This means that the object will have to be detached from the netfs's
308 cookie.
309
310Because the withdrawing releasing/retiring events are all handled by the object
311state machine, it doesn't matter if there's a collision with both ends trying
312to sever the connection at the same time. The state machine can just pick
313which one it wants to honour, and that effects the other.
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
new file mode 100644
index 000000000000..b6b070c57cbf
--- /dev/null
+++ b/Documentation/filesystems/caching/operations.txt
@@ -0,0 +1,213 @@
1 ================================
2 ASYNCHRONOUS OPERATIONS HANDLING
3 ================================
4
5By: David Howells <dhowells@redhat.com>
6
7Contents:
8
9 (*) Overview.
10
11 (*) Operation record initialisation.
12
13 (*) Parameters.
14
15 (*) Procedure.
16
17 (*) Asynchronous callback.
18
19
20========
21OVERVIEW
22========
23
24FS-Cache has an asynchronous operations handling facility that it uses for its
25data storage and retrieval routines. Its operations are represented by
26fscache_operation structs, though these are usually embedded into some other
27structure.
28
29This facility is available to and expected to be be used by the cache backends,
30and FS-Cache will create operations and pass them off to the appropriate cache
31backend for completion.
32
33To make use of this facility, <linux/fscache-cache.h> should be #included.
34
35
36===============================
37OPERATION RECORD INITIALISATION
38===============================
39
40An operation is recorded in an fscache_operation struct:
41
42 struct fscache_operation {
43 union {
44 struct work_struct fast_work;
45 struct slow_work slow_work;
46 };
47 unsigned long flags;
48 fscache_operation_processor_t processor;
49 ...
50 };
51
52Someone wanting to issue an operation should allocate something with this
53struct embedded in it. They should initialise it by calling:
54
55 void fscache_operation_init(struct fscache_operation *op,
56 fscache_operation_release_t release);
57
58with the operation to be initialised and the release function to use.
59
60The op->flags parameter should be set to indicate the CPU time provision and
61the exclusivity (see the Parameters section).
62
63The op->fast_work, op->slow_work and op->processor flags should be set as
64appropriate for the CPU time provision (see the Parameters section).
65
66FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
67operation and waited for afterwards.
68
69
70==========
71PARAMETERS
72==========
73
74There are a number of parameters that can be set in the operation record's flag
75parameter. There are three options for the provision of CPU time in these
76operations:
77
78 (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread
79 may decide it wants to handle an operation itself without deferring it to
80 another thread.
81
82 This is, for example, used in read operations for calling readpages() on
83 the backing filesystem in CacheFiles. Although readpages() does an
84 asynchronous data fetch, the determination of whether pages exist is done
85 synchronously - and the netfs does not proceed until this has been
86 determined.
87
88 If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
89 before submitting the operation, and the operating thread must wait for it
90 to be cleared before proceeding:
91
92 wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
93 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
94
95
96 (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
97 will be given to keventd to process. Such an operation is not permitted
98 to sleep on I/O.
99
100 This is, for example, used by CacheFiles to copy data from a backing fs
101 page to a netfs page after the backing fs has read the page in.
102
103 If this option is used, op->fast_work and op->processor must be
104 initialised before submitting the operation:
105
106 INIT_WORK(&op->fast_work, do_some_work);
107
108
109 (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it
110 will be given to the slow work facility to process. Such an operation is
111 permitted to sleep on I/O.
112
113 This is, for example, used by FS-Cache to handle background writes of
114 pages that have just been fetched from a remote server.
115
116 If this option is used, op->slow_work and op->processor must be
117 initialised before submitting the operation:
118
119 fscache_operation_init_slow(op, processor)
120
121
122Furthermore, operations may be one of two types:
123
124 (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in
125 conjunction with any other operation on the object being operated upon.
126
127 An example of this is the attribute change operation, in which the file
128 being written to may need truncation.
129
130 (2) Shareable. Operations of this type may be running simultaneously. It's
131 up to the operation implementation to prevent interference between other
132 operations running at the same time.
133
134
135=========
136PROCEDURE
137=========
138
139Operations are used through the following procedure:
140
141 (1) The submitting thread must allocate the operation and initialise it
142 itself. Normally this would be part of a more specific structure with the
143 generic op embedded within.
144
145 (2) The submitting thread must then submit the operation for processing using
146 one of the following two functions:
147
148 int fscache_submit_op(struct fscache_object *object,
149 struct fscache_operation *op);
150
151 int fscache_submit_exclusive_op(struct fscache_object *object,
152 struct fscache_operation *op);
153
154 The first function should be used to submit non-exclusive ops and the
155 second to submit exclusive ones. The caller must still set the
156 FSCACHE_OP_EXCLUSIVE flag.
157
158 If successful, both functions will assign the operation to the specified
159 object and return 0. -ENOBUFS will be returned if the object specified is
160 permanently unavailable.
161
162 The operation manager will defer operations on an object that is still
163 undergoing lookup or creation. The operation will also be deferred if an
164 operation of conflicting exclusivity is in progress on the object.
165
166 If the operation is asynchronous, the manager will retain a reference to
167 it, so the caller should put their reference to it by passing it to:
168
169 void fscache_put_operation(struct fscache_operation *op);
170
171 (3) If the submitting thread wants to do the work itself, and has marked the
172 operation with FSCACHE_OP_MYTHREAD, then it should monitor
173 FSCACHE_OP_WAITING as described above and check the state of the object if
174 necessary (the object might have died whilst the thread was waiting).
175
176 When it has finished doing its processing, it should call
177 fscache_put_operation() on it.
178
179 (4) The operation holds an effective lock upon the object, preventing other
180 exclusive ops conflicting until it is released. The operation can be
181 enqueued for further immediate asynchronous processing by adjusting the
182 CPU time provisioning option if necessary, eg:
183
184 op->flags &= ~FSCACHE_OP_TYPE;
185 op->flags |= ~FSCACHE_OP_FAST;
186
187 and calling:
188
189 void fscache_enqueue_operation(struct fscache_operation *op)
190
191 This can be used to allow other things to have use of the worker thread
192 pools.
193
194
195=====================
196ASYNCHRONOUS CALLBACK
197=====================
198
199When used in asynchronous mode, the worker thread pool will invoke the
200processor method with a pointer to the operation. This should then get at the
201container struct by using container_of():
202
203 static void fscache_write_op(struct fscache_operation *_op)
204 {
205 struct fscache_storage *op =
206 container_of(_op, struct fscache_storage, op);
207 ...
208 }
209
210The caller holds a reference on the operation, and will invoke
211fscache_put_operation() when the processor function returns. The processor
212function is at liberty to call fscache_enqueue_operation() or to take extra
213references.
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
new file mode 100644
index 000000000000..ed52af60c2d8
--- /dev/null
+++ b/Documentation/filesystems/debugfs.txt
@@ -0,0 +1,158 @@
1Copyright 2009 Jonathan Corbet <corbet@lwn.net>
2
3Debugfs exists as a simple way for kernel developers to make information
4available to user space. Unlike /proc, which is only meant for information
5about a process, or sysfs, which has strict one-value-per-file rules,
6debugfs has no rules at all. Developers can put any information they want
7there. The debugfs filesystem is also intended to not serve as a stable
8ABI to user space; in theory, there are no stability constraints placed on
9files exported there. The real world is not always so simple, though [1];
10even debugfs interfaces are best designed with the idea that they will need
11to be maintained forever.
12
13Debugfs is typically mounted with a command like:
14
15 mount -t debugfs none /sys/kernel/debug
16
17(Or an equivalent /etc/fstab line).
18
19Note that the debugfs API is exported GPL-only to modules.
20
21Code using debugfs should include <linux/debugfs.h>. Then, the first order
22of business will be to create at least one directory to hold a set of
23debugfs files:
24
25 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
26
27This call, if successful, will make a directory called name underneath the
28indicated parent directory. If parent is NULL, the directory will be
29created in the debugfs root. On success, the return value is a struct
30dentry pointer which can be used to create files in the directory (and to
31clean it up at the end). A NULL return value indicates that something went
32wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the
33kernel has been built without debugfs support and none of the functions
34described below will work.
35
36The most general way to create a file within a debugfs directory is with:
37
38 struct dentry *debugfs_create_file(const char *name, mode_t mode,
39 struct dentry *parent, void *data,
40 const struct file_operations *fops);
41
42Here, name is the name of the file to create, mode describes the access
43permissions the file should have, parent indicates the directory which
44should hold the file, data will be stored in the i_private field of the
45resulting inode structure, and fops is a set of file operations which
46implement the file's behavior. At a minimum, the read() and/or write()
47operations should be provided; others can be included as needed. Again,
48the return value will be a dentry pointer to the created file, NULL for
49error, or ERR_PTR(-ENODEV) if debugfs support is missing.
50
51In a number of cases, the creation of a set of file operations is not
52actually necessary; the debugfs code provides a number of helper functions
53for simple situations. Files containing a single integer value can be
54created with any of:
55
56 struct dentry *debugfs_create_u8(const char *name, mode_t mode,
57 struct dentry *parent, u8 *value);
58 struct dentry *debugfs_create_u16(const char *name, mode_t mode,
59 struct dentry *parent, u16 *value);
60 struct dentry *debugfs_create_u32(const char *name, mode_t mode,
61 struct dentry *parent, u32 *value);
62 struct dentry *debugfs_create_u64(const char *name, mode_t mode,
63 struct dentry *parent, u64 *value);
64
65These files support both reading and writing the given value; if a specific
66file should not be written to, simply set the mode bits accordingly. The
67values in these files are in decimal; if hexadecimal is more appropriate,
68the following functions can be used instead:
69
70 struct dentry *debugfs_create_x8(const char *name, mode_t mode,
71 struct dentry *parent, u8 *value);
72 struct dentry *debugfs_create_x16(const char *name, mode_t mode,
73 struct dentry *parent, u16 *value);
74 struct dentry *debugfs_create_x32(const char *name, mode_t mode,
75 struct dentry *parent, u32 *value);
76
77Note that there is no debugfs_create_x64().
78
79These functions are useful as long as the developer knows the size of the
80value to be exported. Some types can have different widths on different
81architectures, though, complicating the situation somewhat. There is a
82function meant to help out in one special case:
83
84 struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
85 struct dentry *parent,
86 size_t *value);
87
88As might be expected, this function will create a debugfs file to represent
89a variable of type size_t.
90
91Boolean values can be placed in debugfs with:
92
93 struct dentry *debugfs_create_bool(const char *name, mode_t mode,
94 struct dentry *parent, u32 *value);
95
96A read on the resulting file will yield either Y (for non-zero values) or
97N, followed by a newline. If written to, it will accept either upper- or
98lower-case values, or 1 or 0. Any other input will be silently ignored.
99
100Finally, a block of arbitrary binary data can be exported with:
101
102 struct debugfs_blob_wrapper {
103 void *data;
104 unsigned long size;
105 };
106
107 struct dentry *debugfs_create_blob(const char *name, mode_t mode,
108 struct dentry *parent,
109 struct debugfs_blob_wrapper *blob);
110
111A read of this file will return the data pointed to by the
112debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way
113to return several lines of (static) formatted text output. This function
114can be used to export binary information, but there does not appear to be
115any code which does so in the mainline. Note that all files created with
116debugfs_create_blob() are read-only.
117
118There are a couple of other directory-oriented helper functions:
119
120 struct dentry *debugfs_rename(struct dentry *old_dir,
121 struct dentry *old_dentry,
122 struct dentry *new_dir,
123 const char *new_name);
124
125 struct dentry *debugfs_create_symlink(const char *name,
126 struct dentry *parent,
127 const char *target);
128
129A call to debugfs_rename() will give a new name to an existing debugfs
130file, possibly in a different directory. The new_name must not exist prior
131to the call; the return value is old_dentry with updated information.
132Symbolic links can be created with debugfs_create_symlink().
133
134There is one important thing that all debugfs users must take into account:
135there is no automatic cleanup of any directories created in debugfs. If a
136module is unloaded without explicitly removing debugfs entries, the result
137will be a lot of stale pointers and no end of highly antisocial behavior.
138So all debugfs users - at least those which can be built as modules - must
139be prepared to remove all files and directories they create there. A file
140can be removed with:
141
142 void debugfs_remove(struct dentry *dentry);
143
144The dentry value can be NULL, in which case nothing will be removed.
145
146Once upon a time, debugfs users were required to remember the dentry
147pointer for every debugfs file they created so that all files could be
148cleaned up. We live in more civilized times now, though, and debugfs users
149can call:
150
151 void debugfs_remove_recursive(struct dentry *dentry);
152
153If this function is passed a pointer for the dentry corresponding to the
154top-level directory, the entire hierarchy below that directory will be
155removed.
156
157Notes:
158 [1] http://lwn.net/Articles/309298/
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt
new file mode 100644
index 000000000000..0ced74c2f73c
--- /dev/null
+++ b/Documentation/filesystems/exofs.txt
@@ -0,0 +1,176 @@
1===============================================================================
2WHAT IS EXOFS?
3===============================================================================
4
5exofs is a file system that uses an OSD and exports the API of a normal Linux
6file system. Users access exofs like any other local file system, and exofs
7will in turn issue commands to the local OSD initiator.
8
9OSD is a new T10 command set that views storage devices not as a large/flat
10array of sectors but as a container of objects, each having a length, quota,
11time attributes and more. Each object is addressed by a 64bit ID, and is
12contained in a 64bit ID partition. Each object has associated attributes
13attached to it, which are integral part of the object and provide metadata about
14the object. The standard defines some common obligatory attributes, but user
15attributes can be added as needed.
16
17===============================================================================
18ENVIRONMENT
19===============================================================================
20
21To use this file system, you need to have an object store to run it on. You
22may download a target from:
23http://open-osd.org
24
25See Documentation/scsi/osd.txt for how to setup a working osd environment.
26
27===============================================================================
28USAGE
29===============================================================================
30
311. Download and compile exofs and open-osd initiator:
32 You need an external Kernel source tree or kernel headers from your
33 distribution. (anything based on 2.6.26 or later).
34
35 a. download open-osd including exofs source using:
36 [parent-directory]$ git clone git://git.open-osd.org/open-osd.git
37
38 b. Build the library module like this:
39 [parent-directory]$ make -C KSRC=$(KER_DIR) open-osd
40
41 This will build both the open-osd initiator as well as the exofs kernel
42 module. Use whatever parameters you compiled your Kernel with and
43 $(KER_DIR) above pointing to the Kernel you compile against. See the file
44 open-osd/top-level-Makefile for an example.
45
462. Get the OSD initiator and target set up properly, and login to the target.
47 See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd
48 for example script that does all these steps.
49
503. Insmod the exofs.ko module:
51 [exofs]$ insmod exofs.ko
52
534. Make sure the directory where you want to mount exists. If not, create it.
54 (For example, mkdir /mnt/exofs)
55
565. At first run you will need to invoke the mkfs.exofs application
57
58 As an example, this will create the file system on:
59 /dev/osd0 partition ID 65536
60
61 mkfs.exofs --pid=65536 --format /dev/osd0
62
63 The --format is optional if not specified no OSD_FORMAT will be
64 preformed and a clean file system will be created in the specified pid,
65 in the available space of the target. (Use --format=size_in_meg to limit
66 the total LUN space available)
67
68 If pid already exist it will be deleted and a new one will be created in it's
69 place. Be careful.
70
71 An exofs lives inside a single OSD partition. You can create multiple exofs
72 filesystems on the same device using multiple pids.
73
74 (run mkfs.exofs without any parameters for usage help message)
75
766. Mount the file system.
77
78 For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs:
79
80 mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/
81
827. For reference (See do-exofs example script):
83 do-exofs start - an example of how to perform the above steps.
84 do-exofs stop - an example of how to unmount the file system.
85 do-exofs format - an example of how to format and mkfs a new exofs.
86
878. Extra compilation flags (uncomment in fs/exofs/Kbuild):
88 CONFIG_EXOFS_DEBUG - for debug messages and extra checks.
89
90===============================================================================
91exofs mount options
92===============================================================================
93Similar to any mount command:
94 mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory
95
96Where:
97 -t exofs: specifies the exofs file system
98
99 /dev/osdX: X is a decimal number. /dev/osdX was created after a successful
100 login into an OSD target.
101
102 mount_exofs_directory: The directory to mount the file system on
103
104 exofs specific options: Options are separated by commas (,)
105 pid=<integer> - The partition number to mount/create as
106 container of the filesystem.
107 This option is mandatory
108 to=<integer> - Timeout in ticks for a single command
109 default is (60 * HZ) [for debugging only]
110
111===============================================================================
112DESIGN
113===============================================================================
114
115* The file system control block (AKA on-disk superblock) resides in an object
116 with a special ID (defined in common.h).
117 Information included in the file system control block is used to fill the
118 in-memory superblock structure at mount time. This object is created before
119 the file system is used by mkexofs.c It contains information such as:
120 - The file system's magic number
121 - The next inode number to be allocated
122
123* Each file resides in its own object and contains the data (and it will be
124 possible to extend the file over multiple objects, though this has not been
125 implemented yet).
126
127* A directory is treated as a file, and essentially contains a list of <file
128 name, inode #> pairs for files that are found in that directory. The object
129 IDs correspond to the files' inode numbers and will be allocated according to
130 a bitmap (stored in a separate object). Now they are allocated using a
131 counter.
132
133* Each file's control block (AKA on-disk inode) is stored in its object's
134 attributes. This applies to both regular files and other types (directories,
135 device files, symlinks, etc.).
136
137* Credentials are generated per object (inode and superblock) when they is
138 created in memory (read off disk or created). The credential works for all
139 operations and is used as long as the object remains in memory.
140
141* Async OSD operations are used whenever possible, but the target may execute
142 them out of order. The operations that concern us are create, delete,
143 readpage, writepage, update_inode, and truncate. The following pairs of
144 operations should execute in the order written, and we need to prevent them
145 from executing in reverse order:
146 - The following are handled with the OBJ_CREATED and OBJ_2BCREATED
147 flags. OBJ_CREATED is set when we know the object exists on the OSD -
148 in create's callback function, and when we successfully do a read_inode.
149 OBJ_2BCREATED is set in the beginning of the create function, so we
150 know that we should wait.
151 - create/delete: delete should wait until the object is created
152 on the OSD.
153 - create/readpage: readpage should be able to return a page
154 full of zeroes in this case. If there was a write already
155 en-route (i.e. create, writepage, readpage) then the page
156 would be locked, and so it would really be the same as
157 create/writepage.
158 - create/writepage: if writepage is called for a sync write, it
159 should wait until the object is created on the OSD.
160 Otherwise, it should just return.
161 - create/truncate: truncate should wait until the object is
162 created on the OSD.
163 - create/update_inode: update_inode should wait until the
164 object is created on the OSD.
165 - Handled by VFS locks:
166 - readpage/delete: shouldn't happen because of page lock.
167 - writepage/delete: shouldn't happen because of page lock.
168 - readpage/writepage: shouldn't happen because of page lock.
169
170===============================================================================
171LICENSE/COPYRIGHT
172===============================================================================
173The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel
174version 2.6.10). All files include the original copyrights, and the license
175is GPL version 2 (only version 2, as is true for the Linux kernel). The
176Linux kernel can be downloaded from www.kernel.org.
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
index e055acb6b2d4..67639f905f10 100644
--- a/Documentation/filesystems/ext2.txt
+++ b/Documentation/filesystems/ext2.txt
@@ -322,7 +322,7 @@ an upper limit on the block size imposed by the page size of the kernel,
322so 8kB blocks are only allowed on Alpha systems (and other architectures 322so 8kB blocks are only allowed on Alpha systems (and other architectures
323which support larger pages). 323which support larger pages).
324 324
325There is an upper limit of 32768 subdirectories in a single directory. 325There is an upper limit of 32000 subdirectories in a single directory.
326 326
327There is a "soft" upper limit of about 10-15k files in a single directory 327There is a "soft" upper limit of about 10-15k files in a single directory
328with the current linear linked-list directory implementation. This limit 328with the current linear linked-list directory implementation. This limit
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index e5f3833a6ef8..570f9bd9be2b 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -14,6 +14,11 @@ Options
14When mounting an ext3 filesystem, the following option are accepted: 14When mounting an ext3 filesystem, the following option are accepted:
15(*) == default 15(*) == default
16 16
17ro Mount filesystem read only. Note that ext3 will replay
18 the journal (and thus write to the partition) even when
19 mounted "read only". Mount options "ro,noload" can be
20 used to prevent writes to the filesystem.
21
17journal=update Update the ext3 file system's journal to the current 22journal=update Update the ext3 file system's journal to the current
18 format. 23 format.
19 24
@@ -27,7 +32,9 @@ journal_dev=devnum When the external journal device's major/minor numbers
27 identified through its new major/minor numbers encoded 32 identified through its new major/minor numbers encoded
28 in devnum. 33 in devnum.
29 34
30noload Don't load the journal on mounting. 35noload Don't load the journal on mounting. Note that this forces
36 mount of inconsistent filesystem, which can lead to
37 various problems.
31 38
32data=journal All data are committed into the journal prior to being 39data=journal All data are committed into the journal prior to being
33 written into the main file system. 40 written into the main file system.
@@ -92,9 +99,12 @@ nocheck
92 99
93debug Extra debugging information is sent to syslog. 100debug Extra debugging information is sent to syslog.
94 101
95errors=remount-ro(*) Remount the filesystem read-only on an error. 102errors=remount-ro Remount the filesystem read-only on an error.
96errors=continue Keep going on a filesystem error. 103errors=continue Keep going on a filesystem error.
97errors=panic Panic and halt the machine if an error occurs. 104errors=panic Panic and halt the machine if an error occurs.
105 (These mount options override the errors behavior
106 specified in the superblock, which can be
107 configured using tune2fs.)
98 108
99data_err=ignore(*) Just print an error message if an error occurs 109data_err=ignore(*) Just print an error message if an error occurs
100 in a file data buffer in ordered mode. 110 in a file data buffer in ordered mode.
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index cec829bc7291..7be02ac5fa36 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -85,7 +85,7 @@ Note: More extensive information for getting started with ext4 can be
85* extent format more robust in face of on-disk corruption due to magics, 85* extent format more robust in face of on-disk corruption due to magics,
86* internal redundancy in tree 86* internal redundancy in tree
87* improved file allocation (multi-block alloc) 87* improved file allocation (multi-block alloc)
88* fix 32000 subdirectory limit 88* lift 32000 subdirectory limit imposed by i_links_count[1]
89* nsec timestamps for mtime, atime, ctime, create time 89* nsec timestamps for mtime, atime, ctime, create time
90* inode version field on disk (NFSv4, Lustre) 90* inode version field on disk (NFSv4, Lustre)
91* reduced e2fsck time via uninit_bg feature 91* reduced e2fsck time via uninit_bg feature
@@ -100,6 +100,9 @@ Note: More extensive information for getting started with ext4 can be
100* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force 100* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
101 the ordering) 101 the ordering)
102 102
103[1] Filesystems with a block size of 1k may see a limit imposed by the
104directory hash tree having a maximum depth of two.
105
1032.2 Candidate features for future inclusion 1062.2 Candidate features for future inclusion
104 107
105* Online defrag (patches available but not well tested) 108* Online defrag (patches available but not well tested)
@@ -180,8 +183,8 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata
180 performance. 183 performance.
181 184
182barrier=<0|1(*)> This enables/disables the use of write barriers in 185barrier=<0|1(*)> This enables/disables the use of write barriers in
183 the jbd code. barrier=0 disables, barrier=1 enables. 186barrier(*) the jbd code. barrier=0 disables, barrier=1 enables.
184 This also requires an IO stack which can support 187nobarrier This also requires an IO stack which can support
185 barriers, and if jbd gets an error on a barrier 188 barriers, and if jbd gets an error on a barrier
186 write, it will disable again with a warning. 189 write, it will disable again with a warning.
187 Write barriers enforce proper on-disk ordering 190 Write barriers enforce proper on-disk ordering
@@ -189,6 +192,9 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in
189 safe to use, at some performance penalty. If 192 safe to use, at some performance penalty. If
190 your disks are battery-backed in one way or another, 193 your disks are battery-backed in one way or another,
191 disabling barriers may safely improve performance. 194 disabling barriers may safely improve performance.
195 The mount options "barrier" and "nobarrier" can
196 also be used to enable or disable barriers, for
197 consistency with other ext4 mount options.
192 198
193inode_readahead=n This tuning parameter controls the maximum 199inode_readahead=n This tuning parameter controls the maximum
194 number of inode table blocks that ext4's inode 200 number of inode table blocks that ext4's inode
@@ -229,6 +235,10 @@ minixdf Make 'df' act like Minix.
229 235
230debug Extra debugging information is sent to syslog. 236debug Extra debugging information is sent to syslog.
231 237
238abort Simulate the effects of calling ext4_abort() for
239 debugging purposes. This is normally used while
240 remounting a filesystem which is already mounted.
241
232errors=remount-ro Remount the filesystem read-only on an error. 242errors=remount-ro Remount the filesystem read-only on an error.
233errors=continue Keep going on a filesystem error. 243errors=continue Keep going on a filesystem error.
234errors=panic Panic and halt the machine if an error occurs. 244errors=panic Panic and halt the machine if an error occurs.
@@ -288,7 +298,7 @@ max_batch_time=usec Maximum amount of time ext4 should wait for
288 amount of time (on average) that it takes to 298 amount of time (on average) that it takes to
289 finish committing a transaction. Call this time 299 finish committing a transaction. Call this time
290 the "commit time". If the time that the 300 the "commit time". If the time that the
291 transactoin has been running is less than the 301 transaction has been running is less than the
292 commit time, ext4 will try sleeping for the 302 commit time, ext4 will try sleeping for the
293 commit time to see if other operations will join 303 commit time to see if other operations will join
294 the transaction. The commit time is capped by 304 the transaction. The commit time is capped by
@@ -310,6 +320,24 @@ journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the
310 a slightly higher priority than the default I/O 320 a slightly higher priority than the default I/O
311 priority. 321 priority.
312 322
323auto_da_alloc(*) Many broken applications don't use fsync() when
324noauto_da_alloc replacing existing files via patterns such as
325 fd = open("foo.new")/write(fd,..)/close(fd)/
326 rename("foo.new", "foo"), or worse yet,
327 fd = open("foo", O_TRUNC)/write(fd,..)/close(fd).
328 If auto_da_alloc is enabled, ext4 will detect
329 the replace-via-rename and replace-via-truncate
330 patterns and force that any delayed allocation
331 blocks are allocated such that at the next
332 journal commit, in the default data=ordered
333 mode, the data blocks of the new file are forced
334 to disk before the rename() operation is
335 committed. This provides roughly the same level
336 of guarantees as ext3, and avoids the
337 "zero-length" problem that can happen when a
338 system crashes before the delayed allocation
339 blocks are forced to disk.
340
313Data Mode 341Data Mode
314========= 342=========
315There are 3 different data modes: 343There are 3 different data modes:
@@ -334,7 +362,7 @@ written to the journal first, and then to its final location.
334In the event of a crash, the journal can be replayed, bringing both data and 362In the event of a crash, the journal can be replayed, bringing both data and
335metadata into a consistent state. This mode is the slowest except when data 363metadata into a consistent state. This mode is the slowest except when data
336needs to be read from and written to disk at the same time where it 364needs to be read from and written to disk at the same time where it
337outperforms all others modes. Curently ext4 does not have delayed 365outperforms all others modes. Currently ext4 does not have delayed
338allocation support if this data journalling mode is selected. 366allocation support if this data journalling mode is selected.
339 367
340References 368References
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
index 1e3defcfe50b..606233cd4618 100644
--- a/Documentation/filesystems/fiemap.txt
+++ b/Documentation/filesystems/fiemap.txt
@@ -204,7 +204,7 @@ fiemap_check_flags() helper:
204 204
205int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); 205int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
206 206
207The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The 207The struct fieinfo should be passed in as received from ioctl_fiemap(). The
208set of fiemap flags which the fs understands should be passed via fs_flags. If 208set of fiemap flags which the fs understands should be passed via fs_flags. If
209fiemap_check_flags finds invalid user flags, it will place the bad values in 209fiemap_check_flags finds invalid user flags, it will place the bad values in
210fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from 210fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
index 4dae9a3840bf..0494f78d87e4 100644
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -60,7 +60,7 @@ go_lock | Called for the first local holder of a lock
60go_unlock | Called on the final local unlock of a lock 60go_unlock | Called on the final local unlock of a lock
61go_dump | Called to print content of object for debugfs file, or on 61go_dump | Called to print content of object for debugfs file, or on
62 | error to dump glock to the log. 62 | error to dump glock to the log.
63go_type; | The type of the glock, LM_TYPE_..... 63go_type | The type of the glock, LM_TYPE_.....
64go_min_hold_time | The minimum hold time 64go_min_hold_time | The minimum hold time
65 65
66The minimum hold time for each lock is the time after a remote lock 66The minimum hold time for each lock is the time after a remote lock
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt
index 593004b6bbab..5e3ab8f3beff 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.txt
@@ -11,18 +11,15 @@ their I/O so file system consistency is maintained. One of the nifty
11features of GFS is perfect consistency -- changes made to the file system 11features of GFS is perfect consistency -- changes made to the file system
12on one machine show up immediately on all other machines in the cluster. 12on one machine show up immediately on all other machines in the cluster.
13 13
14GFS uses interchangable inter-node locking mechanisms. Different lock 14GFS uses interchangable inter-node locking mechanisms, the currently
15modules can plug into GFS and each file system selects the appropriate 15supported mechanisms are:
16lock module at mount time. Lock modules include:
17 16
18 lock_nolock -- allows gfs to be used as a local file system 17 lock_nolock -- allows gfs to be used as a local file system
19 18
20 lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking 19 lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
21 The dlm is found at linux/fs/dlm/ 20 The dlm is found at linux/fs/dlm/
22 21
23In addition to interfacing with an external locking manager, a gfs lock 22Lock_dlm depends on user space cluster management systems found
24module is responsible for interacting with external cluster management
25systems. Lock_dlm depends on user space cluster management systems found
26at the URL above. 23at the URL above.
27 24
28To use gfs as a local file system, no external clustering systems are 25To use gfs as a local file system, no external clustering systems are
@@ -31,13 +28,19 @@ needed, simply:
31 $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device 28 $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
32 $ mount -t gfs2 /dev/block_device /dir 29 $ mount -t gfs2 /dev/block_device /dir
33 30
34GFS2 is not on-disk compatible with previous versions of GFS. 31If you are using Fedora, you need to install the gfs2-utils package
32and, for lock_dlm, you will also need to install the cman package
33and write a cluster.conf as per the documentation.
34
35GFS2 is not on-disk compatible with previous versions of GFS, but it
36is pretty close.
35 37
36The following man pages can be found at the URL above: 38The following man pages can be found at the URL above:
37 gfs2_fsck to repair a filesystem 39 fsck.gfs2 to repair a filesystem
38 gfs2_grow to expand a filesystem online 40 gfs2_grow to expand a filesystem online
39 gfs2_jadd to add journals to a filesystem online 41 gfs2_jadd to add journals to a filesystem online
40 gfs2_tool to manipulate, examine and tune a filesystem 42 gfs2_tool to manipulate, examine and tune a filesystem
41 gfs2_quota to examine and change quota values in a filesystem 43 gfs2_quota to examine and change quota values in a filesystem
44 gfs2_convert to convert a gfs filesystem to gfs2 in-place
42 mount.gfs2 to help mount(8) mount a filesystem 45 mount.gfs2 to help mount(8) mount a filesystem
43 mkfs.gfs2 to make a filesystem 46 mkfs.gfs2 to make a filesystem
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt
index 6973b980ca2a..3c367c3b3608 100644
--- a/Documentation/filesystems/isofs.txt
+++ b/Documentation/filesystems/isofs.txt
@@ -23,8 +23,13 @@ Mount options unique to the isofs filesystem.
23 map=off Do not map non-Rock Ridge filenames to lower case 23 map=off Do not map non-Rock Ridge filenames to lower case
24 map=normal Map non-Rock Ridge filenames to lower case 24 map=normal Map non-Rock Ridge filenames to lower case
25 map=acorn As map=normal but also apply Acorn extensions if present 25 map=acorn As map=normal but also apply Acorn extensions if present
26 mode=xxx Sets the permissions on files to xxx 26 mode=xxx Sets the permissions on files to xxx unless Rock Ridge
27 dmode=xxx Sets the permissions on directories to xxx 27 extensions set the permissions otherwise
28 dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge
29 extensions set the permissions otherwise
30 overriderockperm Set permissions on files and directories according to
31 'mode' and 'dmode' even though Rock Ridge extensions are
32 present.
28 nojoliet Ignore Joliet extensions if they are present. 33 nojoliet Ignore Joliet extensions if they are present.
29 norock Ignore Rock Ridge extensions if they are present. 34 norock Ignore Rock Ridge extensions if they are present.
30 hide Completely strip hidden files from the file system. 35 hide Completely strip hidden files from the file system.
diff --git a/Documentation/filesystems/knfsd-stats.txt b/Documentation/filesystems/knfsd-stats.txt
new file mode 100644
index 000000000000..64ced5149d37
--- /dev/null
+++ b/Documentation/filesystems/knfsd-stats.txt
@@ -0,0 +1,159 @@
1
2Kernel NFS Server Statistics
3============================
4
5This document describes the format and semantics of the statistics
6which the kernel NFS server makes available to userspace. These
7statistics are available in several text form pseudo files, each of
8which is described separately below.
9
10In most cases you don't need to know these formats, as the nfsstat(8)
11program from the nfs-utils distribution provides a helpful command-line
12interface for extracting and printing them.
13
14All the files described here are formatted as a sequence of text lines,
15separated by newline '\n' characters. Lines beginning with a hash
16'#' character are comments intended for humans and should be ignored
17by parsing routines. All other lines contain a sequence of fields
18separated by whitespace.
19
20/proc/fs/nfsd/pool_stats
21------------------------
22
23This file is available in kernels from 2.6.30 onwards, if the
24/proc/fs/nfsd filesystem is mounted (it almost always should be).
25
26The first line is a comment which describes the fields present in
27all the other lines. The other lines present the following data as
28a sequence of unsigned decimal numeric fields. One line is shown
29for each NFS thread pool.
30
31All counters are 64 bits wide and wrap naturally. There is no way
32to zero these counters, instead applications should do their own
33rate conversion.
34
35pool
36 The id number of the NFS thread pool to which this line applies.
37 This number does not change.
38
39 Thread pool ids are a contiguous set of small integers starting
40 at zero. The maximum value depends on the thread pool mode, but
41 currently cannot be larger than the number of CPUs in the system.
42 Note that in the default case there will be a single thread pool
43 which contains all the nfsd threads and all the CPUs in the system,
44 and thus this file will have a single line with a pool id of "0".
45
46packets-arrived
47 Counts how many NFS packets have arrived. More precisely, this
48 is the number of times that the network stack has notified the
49 sunrpc server layer that new data may be available on a transport
50 (e.g. an NFS or UDP socket or an NFS/RDMA endpoint).
51
52 Depending on the NFS workload patterns and various network stack
53 effects (such as Large Receive Offload) which can combine packets
54 on the wire, this may be either more or less than the number
55 of NFS calls received (which statistic is available elsewhere).
56 However this is a more accurate and less workload-dependent measure
57 of how much CPU load is being placed on the sunrpc server layer
58 due to NFS network traffic.
59
60sockets-enqueued
61 Counts how many times an NFS transport is enqueued to wait for
62 an nfsd thread to service it, i.e. no nfsd thread was considered
63 available.
64
65 The circumstance this statistic tracks indicates that there was NFS
66 network-facing work to be done but it couldn't be done immediately,
67 thus introducing a small delay in servicing NFS calls. The ideal
68 rate of change for this counter is zero; significantly non-zero
69 values may indicate a performance limitation.
70
71 This can happen either because there are too few nfsd threads in the
72 thread pool for the NFS workload (the workload is thread-limited),
73 or because the NFS workload needs more CPU time than is available in
74 the thread pool (the workload is CPU-limited). In the former case,
75 configuring more nfsd threads will probably improve the performance
76 of the NFS workload. In the latter case, the sunrpc server layer is
77 already choosing not to wake idle nfsd threads because there are too
78 many nfsd threads which want to run but cannot, so configuring more
79 nfsd threads will make no difference whatsoever. The overloads-avoided
80 statistic (see below) can be used to distinguish these cases.
81
82threads-woken
83 Counts how many times an idle nfsd thread is woken to try to
84 receive some data from an NFS transport.
85
86 This statistic tracks the circumstance where incoming
87 network-facing NFS work is being handled quickly, which is a good
88 thing. The ideal rate of change for this counter will be close
89 to but less than the rate of change of the packets-arrived counter.
90
91overloads-avoided
92 Counts how many times the sunrpc server layer chose not to wake an
93 nfsd thread, despite the presence of idle nfsd threads, because
94 too many nfsd threads had been recently woken but could not get
95 enough CPU time to actually run.
96
97 This statistic counts a circumstance where the sunrpc layer
98 heuristically avoids overloading the CPU scheduler with too many
99 runnable nfsd threads. The ideal rate of change for this counter
100 is zero. Significant non-zero values indicate that the workload
101 is CPU limited. Usually this is associated with heavy CPU usage
102 on all the CPUs in the nfsd thread pool.
103
104 If a sustained large overloads-avoided rate is detected on a pool,
105 the top(1) utility should be used to check for the following
106 pattern of CPU usage on all the CPUs associated with the given
107 nfsd thread pool.
108
109 - %us ~= 0 (as you're *NOT* running applications on your NFS server)
110
111 - %wa ~= 0
112
113 - %id ~= 0
114
115 - %sy + %hi + %si ~= 100
116
117 If this pattern is seen, configuring more nfsd threads will *not*
118 improve the performance of the workload. If this patten is not
119 seen, then something more subtle is wrong.
120
121threads-timedout
122 Counts how many times an nfsd thread triggered an idle timeout,
123 i.e. was not woken to handle any incoming network packets for
124 some time.
125
126 This statistic counts a circumstance where there are more nfsd
127 threads configured than can be used by the NFS workload. This is
128 a clue that the number of nfsd threads can be reduced without
129 affecting performance. Unfortunately, it's only a clue and not
130 a strong indication, for a couple of reasons:
131
132 - Currently the rate at which the counter is incremented is quite
133 slow; the idle timeout is 60 minutes. Unless the NFS workload
134 remains constant for hours at a time, this counter is unlikely
135 to be providing information that is still useful.
136
137 - It is usually a wise policy to provide some slack,
138 i.e. configure a few more nfsds than are currently needed,
139 to allow for future spikes in load.
140
141
142Note that incoming packets on NFS transports will be dealt with in
143one of three ways. An nfsd thread can be woken (threads-woken counts
144this case), or the transport can be enqueued for later attention
145(sockets-enqueued counts this case), or the packet can be temporarily
146deferred because the transport is currently being used by an nfsd
147thread. This last case is not very interesting and is not explicitly
148counted, but can be inferred from the other counters thus:
149
150packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
151
152
153More
154----
155Descriptions of the other statistics file should go here.
156
157
158Greg Banks <gnb@sgi.com>
15926 Mar 2009
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
index 85eaeaddd27c..e386f7e4bcee 100644
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -100,7 +100,7 @@ Installation
100 $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs 100 $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
101 101
102 In this location, mount.nfs will be invoked automatically for NFS mounts 102 In this location, mount.nfs will be invoked automatically for NFS mounts
103 by the system mount commmand. 103 by the system mount command.
104 104
105 NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed 105 NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
106 on the NFS client machine. You do not need this specific version of 106 on the NFS client machine. You do not need this specific version of
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
new file mode 100644
index 000000000000..05d81cbcb2e1
--- /dev/null
+++ b/Documentation/filesystems/nfs41-server.txt
@@ -0,0 +1,161 @@
1NFSv4.1 Server Implementation
2
3Server support for minorversion 1 can be controlled using the
4/proc/fs/nfsd/versions control file. The string output returned
5by reading this file will contain either "+4.1" or "-4.1"
6correspondingly.
7
8Currently, server support for minorversion 1 is disabled by default.
9It can be enabled at run time by writing the string "+4.1" to
10the /proc/fs/nfsd/versions control file. Note that to write this
11control file, the nfsd service must be taken down. Use your user-mode
12nfs-utils to set this up; see rpc.nfsd(8)
13
14The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
15on the latest NFSv4.1 Internet Draft:
16http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
17
18From the many new features in NFSv4.1 the current implementation
19focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
20"exactly once" semantics and better control and throttling of the
21resources allocated for each client.
22
23Other NFSv4.1 features, Parallel NFS operations in particular,
24are still under development out of tree.
25See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
26for more information.
27
28The table below, taken from the NFSv4.1 document, lists
29the operations that are mandatory to implement (REQ), optional
30(OPT), and NFSv4.0 operations that are required not to implement (MNI)
31in minor version 1. The first column indicates the operations that
32are not supported yet by the linux server implementation.
33
34The OPTIONAL features identified and their abbreviations are as follows:
35 pNFS Parallel NFS
36 FDELG File Delegations
37 DDELG Directory Delegations
38
39The following abbreviations indicate the linux server implementation status.
40 I Implemented NFSv4.1 operations.
41 NS Not Supported.
42 NS* unimplemented optional feature.
43 P pNFS features implemented out of tree.
44 PNS pNFS features that are not supported yet (out of tree).
45
46Operations
47
48 +----------------------+------------+--------------+----------------+
49 | Operation | REQ, REC, | Feature | Definition |
50 | | OPT, or | (REQ, REC, | |
51 | | MNI | or OPT) | |
52 +----------------------+------------+--------------+----------------+
53 | ACCESS | REQ | | Section 18.1 |
54NS | BACKCHANNEL_CTL | REQ | | Section 18.33 |
55NS | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
56 | CLOSE | REQ | | Section 18.2 |
57 | COMMIT | REQ | | Section 18.3 |
58 | CREATE | REQ | | Section 18.4 |
59I | CREATE_SESSION | REQ | | Section 18.36 |
60NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
61 | DELEGRETURN | OPT | FDELG, | Section 18.6 |
62 | | | DDELG, pNFS | |
63 | | | (REQ) | |
64NS | DESTROY_CLIENTID | REQ | | Section 18.50 |
65I | DESTROY_SESSION | REQ | | Section 18.37 |
66I | EXCHANGE_ID | REQ | | Section 18.35 |
67NS | FREE_STATEID | REQ | | Section 18.38 |
68 | GETATTR | REQ | | Section 18.7 |
69P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
70P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
71 | GETFH | REQ | | Section 18.8 |
72NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
73P | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
74P | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
75P | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
76 | LINK | OPT | | Section 18.9 |
77 | LOCK | REQ | | Section 18.10 |
78 | LOCKT | REQ | | Section 18.11 |
79 | LOCKU | REQ | | Section 18.12 |
80 | LOOKUP | REQ | | Section 18.13 |
81 | LOOKUPP | REQ | | Section 18.14 |
82 | NVERIFY | REQ | | Section 18.15 |
83 | OPEN | REQ | | Section 18.16 |
84NS*| OPENATTR | OPT | | Section 18.17 |
85 | OPEN_CONFIRM | MNI | | N/A |
86 | OPEN_DOWNGRADE | REQ | | Section 18.18 |
87 | PUTFH | REQ | | Section 18.19 |
88 | PUTPUBFH | REQ | | Section 18.20 |
89 | PUTROOTFH | REQ | | Section 18.21 |
90 | READ | REQ | | Section 18.22 |
91 | READDIR | REQ | | Section 18.23 |
92 | READLINK | OPT | | Section 18.24 |
93NS | RECLAIM_COMPLETE | REQ | | Section 18.51 |
94 | RELEASE_LOCKOWNER | MNI | | N/A |
95 | REMOVE | REQ | | Section 18.25 |
96 | RENAME | REQ | | Section 18.26 |
97 | RENEW | MNI | | N/A |
98 | RESTOREFH | REQ | | Section 18.27 |
99 | SAVEFH | REQ | | Section 18.28 |
100 | SECINFO | REQ | | Section 18.29 |
101NS | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
102 | | | layout (REQ) | Section 13.12 |
103I | SEQUENCE | REQ | | Section 18.46 |
104 | SETATTR | REQ | | Section 18.30 |
105 | SETCLIENTID | MNI | | N/A |
106 | SETCLIENTID_CONFIRM | MNI | | N/A |
107NS | SET_SSV | REQ | | Section 18.47 |
108NS | TEST_STATEID | REQ | | Section 18.48 |
109 | VERIFY | REQ | | Section 18.31 |
110NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
111 | WRITE | REQ | | Section 18.32 |
112
113Callback Operations
114
115 +-------------------------+-----------+-------------+---------------+
116 | Operation | REQ, REC, | Feature | Definition |
117 | | OPT, or | (REQ, REC, | |
118 | | MNI | or OPT) | |
119 +-------------------------+-----------+-------------+---------------+
120 | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
121P | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
122NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
123P | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
124NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 |
125NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
126 | CB_RECALL | OPT | FDELG, | Section 20.2 |
127 | | | DDELG, pNFS | |
128 | | | (REQ) | |
129NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
130 | | | DDELG, pNFS | |
131 | | | (REQ) | |
132NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
133NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
134 | | | (REQ) | |
135I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
136 | | | DDELG, pNFS | |
137 | | | (REQ) | |
138NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
139 | | | DDELG, pNFS | |
140 | | | (REQ) | |
141 +-------------------------+-----------+-------------+---------------+
142
143Implementation notes:
144
145EXCHANGE_ID:
146* only SP4_NONE state protection supported
147* implementation ids are ignored
148
149CREATE_SESSION:
150* backchannel attributes are ignored
151* backchannel security parameters are ignored
152
153SEQUENCE:
154* no support for dynamic slot table renegotiation (optional)
155
156nfsv4.1 COMPOUND rules:
157The following cases aren't supported yet:
158* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
159 DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
160* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
161
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
new file mode 100644
index 000000000000..01539f410676
--- /dev/null
+++ b/Documentation/filesystems/nilfs2.txt
@@ -0,0 +1,199 @@
1NILFS2
2------
3
4NILFS2 is a log-structured file system (LFS) supporting continuous
5snapshotting. In addition to versioning capability of the entire file
6system, users can even restore files mistakenly overwritten or
7destroyed just a few seconds ago. Since NILFS2 can keep consistency
8like conventional LFS, it achieves quick recovery after system
9crashes.
10
11NILFS2 creates a number of checkpoints every few seconds or per
12synchronous write basis (unless there is no change). Users can select
13significant versions among continuously created checkpoints, and can
14change them into snapshots which will be preserved until they are
15changed back to checkpoints.
16
17There is no limit on the number of snapshots until the volume gets
18full. Each snapshot is mountable as a read-only file system
19concurrently with its writable mount, and this feature is convenient
20for online backup.
21
22The userland tools are included in nilfs-utils package, which is
23available from the following download page. At least "mkfs.nilfs2",
24"mount.nilfs2", "umount.nilfs2", and "nilfs_cleanerd" (so called
25cleaner or garbage collector) are required. Details on the tools are
26described in the man pages included in the package.
27
28Project web page: http://www.nilfs.org/en/
29Download page: http://www.nilfs.org/en/download.html
30Git tree web page: http://www.nilfs.org/git/
31NILFS mailing lists: http://www.nilfs.org/mailman/listinfo/users
32
33Caveats
34=======
35
36Features which NILFS2 does not support yet:
37
38 - atime
39 - extended attributes
40 - POSIX ACLs
41 - quotas
42 - fsck
43 - resize
44 - defragmentation
45
46Mount options
47=============
48
49NILFS2 supports the following mount options:
50(*) == default
51
52barrier=on(*) This enables/disables barriers. barrier=off disables
53 it, barrier=on enables it.
54errors=continue(*) Keep going on a filesystem error.
55errors=remount-ro Remount the filesystem read-only on an error.
56errors=panic Panic and halt the machine if an error occurs.
57cp=n Specify the checkpoint-number of the snapshot to be
58 mounted. Checkpoints and snapshots are listed by lscp
59 user command. Only the checkpoints marked as snapshot
60 are mountable with this option. Snapshot is read-only,
61 so a read-only mount option must be specified together.
62order=relaxed(*) Apply relaxed order semantics that allows modified data
63 blocks to be written to disk without making a
64 checkpoint if no metadata update is going. This mode
65 is equivalent to the ordered data mode of the ext3
66 filesystem except for the updates on data blocks still
67 conserve atomicity. This will improve synchronous
68 write performance for overwriting.
69order=strict Apply strict in-order semantics that preserves sequence
70 of all file operations including overwriting of data
71 blocks. That means, it is guaranteed that no
72 overtaking of events occurs in the recovered file
73 system after a crash.
74
75NILFS2 usage
76============
77
78To use nilfs2 as a local file system, simply:
79
80 # mkfs -t nilfs2 /dev/block_device
81 # mount -t nilfs2 /dev/block_device /dir
82
83This will also invoke the cleaner through the mount helper program
84(mount.nilfs2).
85
86Checkpoints and snapshots are managed by the following commands.
87Their manpages are included in the nilfs-utils package above.
88
89 lscp list checkpoints or snapshots.
90 mkcp make a checkpoint or a snapshot.
91 chcp change an existing checkpoint to a snapshot or vice versa.
92 rmcp invalidate specified checkpoint(s).
93
94To mount a snapshot,
95
96 # mount -t nilfs2 -r -o cp=<cno> /dev/block_device /snap_dir
97
98where <cno> is the checkpoint number of the snapshot.
99
100To unmount the NILFS2 mount point or snapshot, simply:
101
102 # umount /dir
103
104Then, the cleaner daemon is automatically shut down by the umount
105helper program (umount.nilfs2).
106
107Disk format
108===========
109
110A nilfs2 volume is equally divided into a number of segments except
111for the super block (SB) and segment #0. A segment is the container
112of logs. Each log is composed of summary information blocks, payload
113blocks, and an optional super root block (SR):
114
115 ______________________________________________________
116 | |SB| | Segment | Segment | Segment | ... | Segment | |
117 |_|__|_|____0____|____1____|____2____|_____|____N____|_|
118 0 +1K +4K +8M +16M +24M +(8MB x N)
119 . . (Typical offsets for 4KB-block)
120 . .
121 .______________________.
122 | log | log |... | log |
123 |__1__|__2__|____|__m__|
124 . .
125 . .
126 . .
127 .______________________________.
128 | Summary | Payload blocks |SR|
129 |_blocks__|_________________|__|
130
131The payload blocks are organized per file, and each file consists of
132data blocks and B-tree node blocks:
133
134 |<--- File-A --->|<--- File-B --->|
135 _______________________________________________________________
136 | Data blocks | B-tree blocks | Data blocks | B-tree blocks | ...
137 _|_____________|_______________|_____________|_______________|_
138
139
140Since only the modified blocks are written in the log, it may have
141files without data blocks or B-tree node blocks.
142
143The organization of the blocks is recorded in the summary information
144blocks, which contains a header structure (nilfs_segment_summary), per
145file structures (nilfs_finfo), and per block structures (nilfs_binfo):
146
147 _________________________________________________________________________
148 | Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |...
149 |_blocks__|___A___|_(A,1)_|_____|(A,Na)_|___B___|_(B,1)_|_____|(B,Nb)_|___
150
151
152The logs include regular files, directory files, symbolic link files
153and several meta data files. The mata data files are the files used
154to maintain file system meta data. The current version of NILFS2 uses
155the following meta data files:
156
157 1) Inode file (ifile) -- Stores on-disk inodes
158 2) Checkpoint file (cpfile) -- Stores checkpoints
159 3) Segment usage file (sufile) -- Stores allocation state of segments
160 4) Data address translation file -- Maps virtual block numbers to usual
161 (DAT) block numbers. This file serves to
162 make on-disk blocks relocatable.
163
164The following figure shows a typical organization of the logs:
165
166 _________________________________________________________________________
167 | Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR|
168 |_blocks__|_or_directory_|_______|_____|_______|________|________|_____|__|
169
170
171To stride over segment boundaries, this sequence of files may be split
172into multiple logs. The sequence of logs that should be treated as
173logically one log, is delimited with flags marked in the segment
174summary. The recovery code of nilfs2 looks this boundary information
175to ensure atomicity of updates.
176
177The super root block is inserted for every checkpoints. It includes
178three special inodes, inodes for the DAT, cpfile, and sufile. Inodes
179of regular files, directories, symlinks and other special files, are
180included in the ifile. The inode of ifile itself is included in the
181corresponding checkpoint entry in the cpfile. Thus, the hierarchy
182among NILFS2 files can be depicted as follows:
183
184 Super block (SB)
185 |
186 v
187 Super root block (the latest cno=xx)
188 |-- DAT
189 |-- sufile
190 `-- cpfile
191 |-- ifile (cno=c1)
192 |-- ifile (cno=c2) ---- file (ino=i1)
193 : : |-- file (ino=i2)
194 `-- ifile (cno=xx) |-- file (ino=i3)
195 : :
196 `-- file (ino=yy)
197 ( regular file, directory, or symlink )
198
199For detail on the format of each file, please see include/linux/nilfs2_fs.h.
diff --git a/Documentation/filesystems/pohmelfs/design_notes.txt b/Documentation/filesystems/pohmelfs/design_notes.txt
new file mode 100644
index 000000000000..dcf833587162
--- /dev/null
+++ b/Documentation/filesystems/pohmelfs/design_notes.txt
@@ -0,0 +1,71 @@
1POHMELFS: Parallel Optimized Host Message Exchange Layered File System.
2
3 Evgeniy Polyakov <zbr@ioremap.net>
4
5Homepage: http://www.ioremap.net/projects/pohmelfs
6
7POHMELFS first began as a network filesystem with coherent local data and
8metadata caches but is now evolving into a parallel distributed filesystem.
9
10Main features of this FS include:
11 * Locally coherent cache for data and metadata with (potentially) byte-range locks.
12 Since all Linux filesystems lock the whole inode during writing, algorithm
13 is very simple and does not use byte-ranges, although they are sent in
14 locking messages.
15 * Completely async processing of all events except creation of hard and symbolic
16 links, and rename events.
17 Object creation and data reading and writing are processed asynchronously.
18 * Flexible object architecture optimized for network processing.
19 Ability to create long paths to objects and remove arbitrarily huge
20 directories with a single network command.
21 (like removing the whole kernel tree via a single network command).
22 * Very high performance.
23 * Fast and scalable multithreaded userspace server. Being in userspace it works
24 with any underlying filesystem and still is much faster than async in-kernel NFS one.
25 * Client is able to switch between different servers (if one goes down, client
26 automatically reconnects to second and so on).
27 * Transactions support. Full failover for all operations.
28 Resending transactions to different servers on timeout or error.
29 * Read request (data read, directory listing, lookup requests) balancing between multiple servers.
30 * Write requests are replicated to multiple servers and completed only when all of them are acked.
31 * Ability to add and/or remove servers from the working set at run-time.
32 * Strong authentification and possible data encryption in network channel.
33 * Extended attributes support.
34
35POHMELFS is based on transactions, which are potentially long-standing objects that live
36in the client's memory. Each transaction contains all the information needed to process a given
37command (or set of commands, which is frequently used during data writing: single transactions
38can contain creation and data writing commands). Transactions are committed by all the servers
39to which they are sent and, in case of failures, are eventually resent or dropped with an error.
40For example, reading will return an error if no servers are available.
41
42POHMELFS uses a asynchronous approach to data processing. Courtesy of transactions, it is
43possible to detach replies from requests and, if the command requires data to be received, the
44caller sleeps waiting for it. Thus, it is possible to issue multiple read commands to different
45servers and async threads will pick up replies in parallel, find appropriate transactions in the
46system and put the data where it belongs (like the page or inode cache).
47
48The main feature of POHMELFS is writeback data and the metadata cache.
49Only a few non-performance critical operations use the write-through cache and
50are synchronous: hard and symbolic link creation, and object rename. Creation,
51removal of objects and data writing are asynchronous and are sent to
52the server during system writeback. Only one writer at a time is allowed for any
53given inode, which is guarded by an appropriate locking protocol.
54Because of this feature, POHMELFS is extremely fast at metadata intensive
55workloads and can fully utilize the bandwidth to the servers when doing bulk
56data transfers.
57
58POHMELFS clients operate with a working set of servers and are capable of balancing read-only
59operations (like lookups or directory listings) between them according to IO priorities.
60Administrators can add or remove servers from the set at run-time via special commands (described
61in Documentation/pohmelfs/info.txt file). Writes are replicated to all servers, which are connected
62with write permission turned on. IO priority and permissions can be changed in run-time.
63
64POHMELFS is capable of full data channel encryption and/or strong crypto hashing.
65One can select any kernel supported cipher, encryption mode, hash type and operation mode
66(hmac or digest). It is also possible to use both or neither (default). Crypto configuration
67is checked during mount time and, if the server does not support it, appropriate capabilities
68will be disabled or mount will fail (if 'crypto_fail_unsupported' mount option is specified).
69Crypto performance heavily depends on the number of crypto threads, which asynchronously perform
70crypto operations and send the resulting data to server or submit it up the stack. This number
71can be controlled via a mount option.
diff --git a/Documentation/filesystems/pohmelfs/info.txt b/Documentation/filesystems/pohmelfs/info.txt
new file mode 100644
index 000000000000..db2e41393626
--- /dev/null
+++ b/Documentation/filesystems/pohmelfs/info.txt
@@ -0,0 +1,99 @@
1POHMELFS usage information.
2
3Mount options.
4All but index, number of crypto threads and maximum IO size can changed via remount.
5
6idx=%u
7 Each mountpoint is associated with a special index via this option.
8 Administrator can add or remove servers from the given index, so all mounts,
9 which were attached to it, are updated.
10 Default it is 0.
11
12trans_scan_timeout=%u
13 This timeout, expressed in milliseconds, specifies time to scan transaction
14 trees looking for stale requests, which have to be resent, or if number of
15 retries exceed specified limit, dropped with error.
16 Default is 5 seconds.
17
18drop_scan_timeout=%u
19 Internal timeout, expressed in milliseconds, which specifies how frequently
20 inodes marked to be dropped are freed. It also specifies how frequently
21 the system checks that servers have to be added or removed from current working set.
22 Default is 1 second.
23
24wait_on_page_timeout=%u
25 Number of milliseconds to wait for reply from remote server for data reading command.
26 If this timeout is exceeded, reading returns an error.
27 Default is 5 seconds.
28
29trans_retries=%u
30 This is the number of times that a transaction will be resent to a server that did
31 not answer for the last @trans_scan_timeout milliseconds.
32 When the number of resends exceeds this limit, the transaction is completed with error.
33 Default is 5 resends.
34
35crypto_thread_num=%u
36 Number of crypto processing threads. Threads are used both for RX and TX traffic.
37 Default is 2, or no threads if crypto operations are not supported.
38
39trans_max_pages=%u
40 Maximum number of pages in a single transaction. This parameter also controls
41 the number of pages, allocated for crypto processing (each crypto thread has
42 pool of pages, the number of which is equal to 'trans_max_pages'.
43 Default is 100 pages.
44
45crypto_fail_unsupported
46 If specified, mount will fail if the server does not support requested crypto operations.
47 By default mount will disable non-matching crypto operations.
48
49mcache_timeout=%u
50 Maximum number of milliseconds to wait for the mcache objects to be processed.
51 Mcache includes locks (given lock should be granted by server), attributes (they should be
52 fully received in the given timeframe).
53 Default is 5 seconds.
54
55Usage examples.
56
57Add server server1.net:1025 into the working set with index $idx
58with appropriate hash algorithm and key file and cipher algorithm, mode and key file:
59$cfg A add -a server1.net -p 1025 -i $idx -K $hash_key -k $cipher_key
60
61Mount filesystem with given index $idx to /mnt mountpoint.
62Client will connect to all servers specified in the working set via previous command:
63mount -t pohmel -o idx=$idx q /mnt
64
65Change permissions to read-only (-I 1 option, '-I 2' - write-only, 3 - rw):
66$cfg A modify -a server1.net -p 1025 -i $idx -I 1
67
68Change IO priority to 123 (node with the highest priority gets read requests).
69$cfg A modify -a server1.net -p 1025 -i $idx -P 123
70
71One can check currect status of all connections in the mountstats file:
72# cat /proc/$PID/mountstats
73...
74device none mounted on /mnt with fstype pohmel
75idx addr(:port) socket_type protocol active priority permissions
760 server1.net:1026 1 6 1 250 1
770 server2.net:1025 1 6 1 123 3
78
79Server installation.
80
81Creating a server, which listens at port 1025 and 0.0.0.0 address.
82Working root directory (note, that server chroots there, so you have to have appropriate permissions)
83is set to /mnt, server will negotiate hash/cipher with client, in case client requested it, there
84are appropriate key files.
85Number of working threads is set to 10.
86
87# ./fserver -a 0.0.0.0 -p 1025 -r /mnt -w 10 -K hash_key -k cipher_key
88
89 -A 6 - listen on ipv6 address. Default: Disabled.
90 -r root - path to root directory. Default: /tmp.
91 -a addr - listen address. Default: 0.0.0.0.
92 -p port - listen port. Default: 1025.
93 -w workers - number of workers per connected client. Default: 1.
94 -K file - hash key size. Default: none.
95 -k file - cipher key size. Default: none.
96 -h - this help.
97
98Number of worker threads specifies how many workers will be created for each client.
99Bulk single-client transafers usually are better handled with smaller number (like 1-3).
diff --git a/Documentation/filesystems/pohmelfs/network_protocol.txt b/Documentation/filesystems/pohmelfs/network_protocol.txt
new file mode 100644
index 000000000000..40ea6c295afb
--- /dev/null
+++ b/Documentation/filesystems/pohmelfs/network_protocol.txt
@@ -0,0 +1,227 @@
1POHMELFS network protocol.
2
3Basic structure used in network communication is following command:
4
5struct netfs_cmd
6{
7 __u16 cmd; /* Command number */
8 __u16 csize; /* Attached crypto information size */
9 __u16 cpad; /* Attached padding size */
10 __u16 ext; /* External flags */
11 __u32 size; /* Size of the attached data */
12 __u32 trans; /* Transaction id */
13 __u64 id; /* Object ID to operate on. Used for feedback.*/
14 __u64 start; /* Start of the object. */
15 __u64 iv; /* IV sequence */
16 __u8 data[0];
17};
18
19Commands can be embedded into transaction command (which in turn has own command),
20so one can extend protocol as needed without breaking backward compatibility as long
21as old commands are supported. All string lengths include tail 0 byte.
22
23All commans are transfered over the network in big-endian. CPU endianess is used at the end peers.
24
25@cmd - command number, which specifies command to be processed. Following
26 commands are used currently:
27
28 NETFS_READDIR = 1, /* Read directory for given inode number */
29 NETFS_READ_PAGE, /* Read data page from the server */
30 NETFS_WRITE_PAGE, /* Write data page to the server */
31 NETFS_CREATE, /* Create directory entry */
32 NETFS_REMOVE, /* Remove directory entry */
33 NETFS_LOOKUP, /* Lookup single object */
34 NETFS_LINK, /* Create a link */
35 NETFS_TRANS, /* Transaction */
36 NETFS_OPEN, /* Open intent */
37 NETFS_INODE_INFO, /* Metadata cache coherency synchronization message */
38 NETFS_PAGE_CACHE, /* Page cache invalidation message */
39 NETFS_READ_PAGES, /* Read multiple contiguous pages in one go */
40 NETFS_RENAME, /* Rename object */
41 NETFS_CAPABILITIES, /* Capabilities of the client, for example supported crypto */
42 NETFS_LOCK, /* Distributed lock message */
43 NETFS_XATTR_SET, /* Set extended attribute */
44 NETFS_XATTR_GET, /* Get extended attribute */
45
46@ext - external flags. Used by different commands to specify some extra arguments
47 like partial size of the embedded objects or creation flags.
48
49@size - size of the attached data. For NETFS_READ_PAGE and NETFS_READ_PAGES no data is attached,
50 but size of the requested data is incorporated here. It does not include size of the command
51 header (struct netfs_cmd) itself.
52
53@id - id of the object this command operates on. Each command can use it for own purpose.
54
55@start - start of the object this command operates on. Each command can use it for own purpose.
56
57@csize, @cpad - size and padding size of the (attached if needed) crypto information.
58
59Command specifications.
60
61@NETFS_READDIR
62This command is used to sync content of the remote dir to the client.
63
64@ext - length of the path to object.
65@size - the same.
66@id - local inode number of the directory to read.
67@start - zero.
68
69
70@NETFS_READ_PAGE
71This command is used to read data from remote server.
72Data size does not exceed local page cache size.
73
74@id - inode number.
75@start - first byte offset.
76@size - number of bytes to read plus length of the path to object.
77@ext - object path length.
78
79
80@NETFS_CREATE
81Used to create object.
82It does not require that all directories on top of the object were
83already created, it will create them automatically. Each object has
84associated @netfs_path_entry data structure, which contains creation
85mode (permissions and type) and length of the name as long as name itself.
86
87@start - 0
88@size - size of the all data structures needed to create a path
89@id - local inode number
90@ext - 0
91
92
93@NETFS_REMOVE
94Used to remove object.
95
96@ext - length of the path to object.
97@size - the same.
98@id - local inode number.
99@start - zero.
100
101
102@NETFS_LOOKUP
103Lookup information about object on server.
104
105@ext - length of the path to object.
106@size - the same.
107@id - local inode number of the directory to look object in.
108@start - local inode number of the object to look at.
109
110
111@NETFS_LINK
112Create hard of symlink.
113Command is sent as "object_path|target_path".
114
115@size - size of the above string.
116@id - parent local inode number.
117@start - 1 for symlink, 0 for hardlink.
118@ext - size of the "object_path" above.
119
120
121@NETFS_TRANS
122Transaction header.
123
124@size - incorporates all embedded command sizes including theirs header sizes.
125@start - transaction generation number - unique id used to find transaction.
126@ext - transaction flags. Unused at the moment.
127@id - 0.
128
129
130@NETFS_OPEN
131Open intent for given transaction.
132
133@id - local inode number.
134@start - 0.
135@size - path length to the object.
136@ext - open flags (O_RDWR and so on).
137
138
139@NETFS_INODE_INFO
140Metadata update command.
141It is sent to servers when attributes of the object are changed and received
142when data or metadata were updated. It operates with the following structure:
143
144struct netfs_inode_info
145{
146 unsigned int mode;
147 unsigned int nlink;
148 unsigned int uid;
149 unsigned int gid;
150 unsigned int blocksize;
151 unsigned int padding;
152 __u64 ino;
153 __u64 blocks;
154 __u64 rdev;
155 __u64 size;
156 __u64 version;
157};
158
159It effectively mirrors stat(2) returned data.
160
161
162@ext - path length to the object.
163@size - the same plus size of the netfs_inode_info structure.
164@id - local inode number.
165@start - 0.
166
167
168@NETFS_PAGE_CACHE
169Command is only received by clients. It contains information about
170page to be marked as not up-to-date.
171
172@id - client's inode number.
173@start - last byte of the page to be invalidated. If it is not equal to
174 current inode size, it will be vmtruncated().
175@size - 0
176@ext - 0
177
178
179@NETFS_READ_PAGES
180Used to read multiple contiguous pages in one go.
181
182@start - first byte of the contiguous region to read.
183@size - contains of two fields: lower 8 bits are used to represent page cache shift
184 used by client, another 3 bytes are used to get number of pages.
185@id - local inode number.
186@ext - path length to the object.
187
188
189@NETFS_RENAME
190Used to rename object.
191Attached data is formed into following string: "old_path|new_path".
192
193@id - local inode number.
194@start - parent inode number.
195@size - length of the above string.
196@ext - length of the old path part.
197
198
199@NETFS_CAPABILITIES
200Used to exchange crypto capabilities with server.
201If crypto capabilities are not supported by server, then client will disable it
202or fail (if 'crypto_fail_unsupported' mount options was specified).
203
204@id - superblock index. Used to specify crypto information for group of servers.
205@size - size of the attached capabilities structure.
206@start - 0.
207@size - 0.
208@scsize - 0.
209
210@NETFS_LOCK
211Used to send lock request/release messages. Although it sends byte range request
212and is capable of flushing pages based on that, it is not used, since all Linux
213filesystems lock the whole inode.
214
215@id - lock generation number.
216@start - start of the locked range.
217@size - size of the locked range.
218@ext - lock type: read/write. Not used actually. 15'th bit is used to determine,
219 if it is lock request (1) or release (0).
220
221@NETFS_XATTR_SET
222@NETFS_XATTR_GET
223Used to set/get extended attributes for given inode.
224@id - attribute generation number or xattr setting type
225@start - size of the attribute (request or attached)
226@size - name length, path len and data size for given attribute
227@ext - path length for given object
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 830bad7cce0f..fad18f9456e4 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -5,10 +5,12 @@
5 Bodo Bauer <bb@ricochet.net> 5 Bodo Bauer <bb@ricochet.net>
6 6
72.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000 72.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
8move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
8------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
9Version 1.3 Kernel version 2.2.12 10Version 1.3 Kernel version 2.2.12
10 Kernel version 2.4.0-test11-pre4 11 Kernel version 2.4.0-test11-pre4
11------------------------------------------------------------------------------ 12------------------------------------------------------------------------------
13fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
12 14
13Table of Contents 15Table of Contents
14----------------- 16-----------------
@@ -26,25 +28,17 @@ Table of Contents
26 1.6 Parallel port info in /proc/parport 28 1.6 Parallel port info in /proc/parport
27 1.7 TTY info in /proc/tty 29 1.7 TTY info in /proc/tty
28 1.8 Miscellaneous kernel statistics in /proc/stat 30 1.8 Miscellaneous kernel statistics in /proc/stat
31 1.9 Ext4 file system parameters
29 32
30 2 Modifying System Parameters 33 2 Modifying System Parameters
31 2.1 /proc/sys/fs - File system data 34
32 2.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats 35 3 Per-Process Parameters
33 2.3 /proc/sys/kernel - general kernel parameters 36 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
34 2.4 /proc/sys/vm - The virtual memory subsystem 37 3.2 /proc/<pid>/oom_score - Display current oom-killer score
35 2.5 /proc/sys/dev - Device specific parameters 38 3.3 /proc/<pid>/io - Display the IO accounting fields
36 2.6 /proc/sys/sunrpc - Remote procedure calls 39 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
37 2.7 /proc/sys/net - Networking stuff 40 3.5 /proc/<pid>/mountinfo - Information about mounts
38 2.8 /proc/sys/net/ipv4 - IPV4 settings 41
39 2.9 Appletalk
40 2.10 IPX
41 2.11 /proc/sys/fs/mqueue - POSIX message queues filesystem
42 2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score
43 2.13 /proc/<pid>/oom_score - Display current oom-killer score
44 2.14 /proc/<pid>/io - Display the IO accounting fields
45 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
46 2.16 /proc/<pid>/mountinfo - Information about mounts
47 2.17 /proc/sys/fs/epoll - Configuration options for the epoll interface
48 42
49------------------------------------------------------------------------------ 43------------------------------------------------------------------------------
50Preface 44Preface
@@ -123,7 +117,7 @@ The link self points to the process reading the file system. Each process
123subdirectory has the entries listed in Table 1-1. 117subdirectory has the entries listed in Table 1-1.
124 118
125 119
126Table 1-1: Process specific entries in /proc 120Table 1-1: Process specific entries in /proc
127.............................................................................. 121..............................................................................
128 File Content 122 File Content
129 clear_refs Clears page referenced bits shown in smaps output 123 clear_refs Clears page referenced bits shown in smaps output
@@ -141,46 +135,103 @@ Table 1-1: Process specific entries in /proc
141 status Process status in human readable form 135 status Process status in human readable form
142 wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan 136 wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan
143 stack Report full stack trace, enable via CONFIG_STACKTRACE 137 stack Report full stack trace, enable via CONFIG_STACKTRACE
144 smaps Extension based on maps, the rss size for each mapped file 138 smaps a extension based on maps, showing the memory consumption of
139 each mapping
145.............................................................................. 140..............................................................................
146 141
147For example, to get the status information of a process, all you have to do is 142For example, to get the status information of a process, all you have to do is
148read the file /proc/PID/status: 143read the file /proc/PID/status:
149 144
150 >cat /proc/self/status 145 >cat /proc/self/status
151 Name: cat 146 Name: cat
152 State: R (running) 147 State: R (running)
153 Pid: 5452 148 Tgid: 5452
154 PPid: 743 149 Pid: 5452
150 PPid: 743
155 TracerPid: 0 (2.4) 151 TracerPid: 0 (2.4)
156 Uid: 501 501 501 501 152 Uid: 501 501 501 501
157 Gid: 100 100 100 100 153 Gid: 100 100 100 100
158 Groups: 100 14 16 154 FDSize: 256
159 VmSize: 1112 kB 155 Groups: 100 14 16
160 VmLck: 0 kB 156 VmPeak: 5004 kB
161 VmRSS: 348 kB 157 VmSize: 5004 kB
162 VmData: 24 kB 158 VmLck: 0 kB
163 VmStk: 12 kB 159 VmHWM: 476 kB
164 VmExe: 8 kB 160 VmRSS: 476 kB
165 VmLib: 1044 kB 161 VmData: 156 kB
166 SigPnd: 0000000000000000 162 VmStk: 88 kB
167 SigBlk: 0000000000000000 163 VmExe: 68 kB
168 SigIgn: 0000000000000000 164 VmLib: 1412 kB
169 SigCgt: 0000000000000000 165 VmPTE: 20 kb
170 CapInh: 00000000fffffeff 166 Threads: 1
171 CapPrm: 0000000000000000 167 SigQ: 0/28578
172 CapEff: 0000000000000000 168 SigPnd: 0000000000000000
173 169 ShdPnd: 0000000000000000
170 SigBlk: 0000000000000000
171 SigIgn: 0000000000000000
172 SigCgt: 0000000000000000
173 CapInh: 00000000fffffeff
174 CapPrm: 0000000000000000
175 CapEff: 0000000000000000
176 CapBnd: ffffffffffffffff
177 voluntary_ctxt_switches: 0
178 nonvoluntary_ctxt_switches: 1
174 179
175This shows you nearly the same information you would get if you viewed it with 180This shows you nearly the same information you would get if you viewed it with
176the ps command. In fact, ps uses the proc file system to obtain its 181the ps command. In fact, ps uses the proc file system to obtain its
177information. The statm file contains more detailed information about the 182information. But you get a more detailed view of the process by reading the
178process memory usage. Its seven fields are explained in Table 1-2. The stat 183file /proc/PID/status. It fields are described in table 1-2.
179file contains details information about the process itself. Its fields are
180explained in Table 1-3.
181 184
185The statm file contains more detailed information about the process
186memory usage. Its seven fields are explained in Table 1-3. The stat file
187contains details information about the process itself. Its fields are
188explained in Table 1-4.
182 189
183Table 1-2: Contents of the statm files (as of 2.6.8-rc3) 190Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
191..............................................................................
192 Field Content
193 Name filename of the executable
194 State state (R is running, S is sleeping, D is sleeping
195 in an uninterruptible wait, Z is zombie,
196 T is traced or stopped)
197 Tgid thread group ID
198 Pid process id
199 PPid process id of the parent process
200 TracerPid PID of process tracing this process (0 if not)
201 Uid Real, effective, saved set, and file system UIDs
202 Gid Real, effective, saved set, and file system GIDs
203 FDSize number of file descriptor slots currently allocated
204 Groups supplementary group list
205 VmPeak peak virtual memory size
206 VmSize total program size
207 VmLck locked memory size
208 VmHWM peak resident set size ("high water mark")
209 VmRSS size of memory portions
210 VmData size of data, stack, and text segments
211 VmStk size of data, stack, and text segments
212 VmExe size of text segment
213 VmLib size of shared library code
214 VmPTE size of page table entries
215 Threads number of threads
216 SigQ number of signals queued/max. number for queue
217 SigPnd bitmap of pending signals for the thread
218 ShdPnd bitmap of shared pending signals for the process
219 SigBlk bitmap of blocked signals
220 SigIgn bitmap of ignored signals
221 SigCgt bitmap of catched signals
222 CapInh bitmap of inheritable capabilities
223 CapPrm bitmap of permitted capabilities
224 CapEff bitmap of effective capabilities
225 CapBnd bitmap of capabilities bounding set
226 Cpus_allowed mask of CPUs on which this process may run
227 Cpus_allowed_list Same as previous, but in "list format"
228 Mems_allowed mask of memory nodes allowed to this process
229 Mems_allowed_list Same as previous, but in "list format"
230 voluntary_ctxt_switches number of voluntary context switches
231 nonvoluntary_ctxt_switches number of non voluntary context switches
232..............................................................................
233
234Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
184.............................................................................. 235..............................................................................
185 Field Content 236 Field Content
186 size total program size (pages) (same as VmSize in status) 237 size total program size (pages) (same as VmSize in status)
@@ -195,7 +246,7 @@ Table 1-2: Contents of the statm files (as of 2.6.8-rc3)
195.............................................................................. 246..............................................................................
196 247
197 248
198Table 1-3: Contents of the stat files (as of 2.6.22-rc3) 249Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
199.............................................................................. 250..............................................................................
200 Field Content 251 Field Content
201 pid process id 252 pid process id
@@ -229,10 +280,10 @@ Table 1-3: Contents of the stat files (as of 2.6.22-rc3)
229 start_stack address of the start of the stack 280 start_stack address of the start of the stack
230 esp current value of ESP 281 esp current value of ESP
231 eip current value of EIP 282 eip current value of EIP
232 pending bitmap of pending signals (obsolete) 283 pending bitmap of pending signals
233 blocked bitmap of blocked signals (obsolete) 284 blocked bitmap of blocked signals
234 sigign bitmap of ignored signals (obsolete) 285 sigign bitmap of ignored signals
235 sigcatch bitmap of catched signals (obsolete) 286 sigcatch bitmap of catched signals
236 wchan address where process went to sleep 287 wchan address where process went to sleep
237 0 (place holder) 288 0 (place holder)
238 0 (place holder) 289 0 (place holder)
@@ -241,19 +292,99 @@ Table 1-3: Contents of the stat files (as of 2.6.22-rc3)
241 rt_priority realtime priority 292 rt_priority realtime priority
242 policy scheduling policy (man sched_setscheduler) 293 policy scheduling policy (man sched_setscheduler)
243 blkio_ticks time spent waiting for block IO 294 blkio_ticks time spent waiting for block IO
295 gtime guest time of the task in jiffies
296 cgtime guest time of the task children in jiffies
244.............................................................................. 297..............................................................................
245 298
299The /proc/PID/map file containing the currently mapped memory regions and
300their access permissions.
301
302The format is:
303
304address perms offset dev inode pathname
305
30608048000-08049000 r-xp 00000000 03:00 8312 /opt/test
30708049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
3080804a000-0806b000 rw-p 00000000 00:00 0 [heap]
309a7cb1000-a7cb2000 ---p 00000000 00:00 0
310a7cb2000-a7eb2000 rw-p 00000000 00:00 0
311a7eb2000-a7eb3000 ---p 00000000 00:00 0
312a7eb3000-a7ed5000 rw-p 00000000 00:00 0
313a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
314a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
315a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
316a800b000-a800e000 rw-p 00000000 00:00 0
317a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
318a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
319a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
320a8024000-a8027000 rw-p 00000000 00:00 0
321a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
322a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
323a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
324aff35000-aff4a000 rw-p 00000000 00:00 0 [stack]
325ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
326
327where "address" is the address space in the process that it occupies, "perms"
328is a set of permissions:
329
330 r = read
331 w = write
332 x = execute
333 s = shared
334 p = private (copy on write)
335
336"offset" is the offset into the mapping, "dev" is the device (major:minor), and
337"inode" is the inode on that device. 0 indicates that no inode is associated
338with the memory region, as the case would be with BSS (uninitialized data).
339The "pathname" shows the name associated file for this mapping. If the mapping
340is not associated with a file:
341
342 [heap] = the heap of the program
343 [stack] = the stack of the main process
344 [vdso] = the "virtual dynamic shared object",
345 the kernel system call handler
346
347 or if empty, the mapping is anonymous.
348
349
350The /proc/PID/smaps is an extension based on maps, showing the memory
351consumption for each of the process's mappings. For each of mappings there
352is a series of lines such as the following:
353
35408048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
355Size: 1084 kB
356Rss: 892 kB
357Pss: 374 kB
358Shared_Clean: 892 kB
359Shared_Dirty: 0 kB
360Private_Clean: 0 kB
361Private_Dirty: 0 kB
362Referenced: 892 kB
363Swap: 0 kB
364KernelPageSize: 4 kB
365MMUPageSize: 4 kB
366
367The first of these lines shows the same information as is displayed for the
368mapping in /proc/PID/maps. The remaining lines show the size of the mapping,
369the amount of the mapping that is currently resident in RAM, the "proportional
370set size” (divide each shared page by the number of processes sharing it), the
371number of clean and dirty shared pages in the mapping, and the number of clean
372and dirty private pages in the mapping. The "Referenced" indicates the amount
373of memory currently marked as referenced or accessed.
374
375This file is only present if the CONFIG_MMU kernel configuration option is
376enabled.
246 377
2471.2 Kernel data 3781.2 Kernel data
248--------------- 379---------------
249 380
250Similar to the process entries, the kernel data files give information about 381Similar to the process entries, the kernel data files give information about
251the running kernel. The files used to obtain this information are contained in 382the running kernel. The files used to obtain this information are contained in
252/proc and are listed in Table 1-4. Not all of these will be present in your 383/proc and are listed in Table 1-5. Not all of these will be present in your
253system. It depends on the kernel configuration and the loaded modules, which 384system. It depends on the kernel configuration and the loaded modules, which
254files are there, and which are missing. 385files are there, and which are missing.
255 386
256Table 1-4: Kernel info in /proc 387Table 1-5: Kernel info in /proc
257.............................................................................. 388..............................................................................
258 File Content 389 File Content
259 apm Advanced power management info 390 apm Advanced power management info
@@ -290,6 +421,7 @@ Table 1-4: Kernel info in /proc
290 rtc Real time clock 421 rtc Real time clock
291 scsi SCSI info (see text) 422 scsi SCSI info (see text)
292 slabinfo Slab pool info 423 slabinfo Slab pool info
424 softirqs softirq usage
293 stat Overall statistics 425 stat Overall statistics
294 swaps Swap space utilization 426 swaps Swap space utilization
295 sys See chapter 2 427 sys See chapter 2
@@ -373,7 +505,7 @@ just those considered 'most important'. The new vectors are:
373 RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are 505 RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are
374 sent from one CPU to another per the needs of the OS. Typically, 506 sent from one CPU to another per the needs of the OS. Typically,
375 their statistics are used by kernel developers and interested users to 507 their statistics are used by kernel developers and interested users to
376 determine the occurance of interrupt of the given type. 508 determine the occurrence of interrupts of the given type.
377 509
378The above IRQ vectors are displayed only when relevent. For example, 510The above IRQ vectors are displayed only when relevent. For example,
379the threshold vector does not exist on x86_64 platforms. Others are 511the threshold vector does not exist on x86_64 platforms. Others are
@@ -558,7 +690,7 @@ Committed_AS: The amount of memory presently allocated on the system.
558 memory once that memory has been successfully allocated. 690 memory once that memory has been successfully allocated.
559VmallocTotal: total size of vmalloc memory area 691VmallocTotal: total size of vmalloc memory area
560 VmallocUsed: amount of vmalloc area which is used 692 VmallocUsed: amount of vmalloc area which is used
561VmallocChunk: largest contigious block of vmalloc area which is free 693VmallocChunk: largest contiguous block of vmalloc area which is free
562 694
563.............................................................................. 695..............................................................................
564 696
@@ -604,6 +736,25 @@ on the kind of area :
6040xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... 7360xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ...
605 pages=10 vmalloc N0=10 737 pages=10 vmalloc N0=10
606 738
739..............................................................................
740
741softirqs:
742
743Provides counts of softirq handlers serviced since boot time, for each cpu.
744
745> cat /proc/softirqs
746 CPU0 CPU1 CPU2 CPU3
747 HI: 0 0 0 0
748 TIMER: 27166 27120 27097 27034
749 NET_TX: 0 0 0 17
750 NET_RX: 42 0 0 39
751 BLOCK: 0 0 107 1121
752 TASKLET: 0 0 0 290
753 SCHED: 27035 26983 26971 26746
754 HRTIMER: 0 0 0 0
755 RCU: 1678 1769 2178 2250
756
757
6071.3 IDE devices in /proc/ide 7581.3 IDE devices in /proc/ide
608---------------------------- 759----------------------------
609 760
@@ -621,10 +772,10 @@ IDE devices:
621 772
622More detailed information can be found in the controller specific 773More detailed information can be found in the controller specific
623subdirectories. These are named ide0, ide1 and so on. Each of these 774subdirectories. These are named ide0, ide1 and so on. Each of these
624directories contains the files shown in table 1-5. 775directories contains the files shown in table 1-6.
625 776
626 777
627Table 1-5: IDE controller info in /proc/ide/ide? 778Table 1-6: IDE controller info in /proc/ide/ide?
628.............................................................................. 779..............................................................................
629 File Content 780 File Content
630 channel IDE channel (0 or 1) 781 channel IDE channel (0 or 1)
@@ -634,11 +785,11 @@ Table 1-5: IDE controller info in /proc/ide/ide?
634.............................................................................. 785..............................................................................
635 786
636Each device connected to a controller has a separate subdirectory in the 787Each device connected to a controller has a separate subdirectory in the
637controllers directory. The files listed in table 1-6 are contained in these 788controllers directory. The files listed in table 1-7 are contained in these
638directories. 789directories.
639 790
640 791
641Table 1-6: IDE device information 792Table 1-7: IDE device information
642.............................................................................. 793..............................................................................
643 File Content 794 File Content
644 cache The cache 795 cache The cache
@@ -680,12 +831,12 @@ the drive parameters:
6801.4 Networking info in /proc/net 8311.4 Networking info in /proc/net
681-------------------------------- 832--------------------------------
682 833
683The subdirectory /proc/net follows the usual pattern. Table 1-6 shows the 834The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the
684additional values you get for IP version 6 if you configure the kernel to 835additional values you get for IP version 6 if you configure the kernel to
685support this. Table 1-7 lists the files and their meaning. 836support this. Table 1-9 lists the files and their meaning.
686 837
687 838
688Table 1-6: IPv6 info in /proc/net 839Table 1-8: IPv6 info in /proc/net
689.............................................................................. 840..............................................................................
690 File Content 841 File Content
691 udp6 UDP sockets (IPv6) 842 udp6 UDP sockets (IPv6)
@@ -700,7 +851,7 @@ Table 1-6: IPv6 info in /proc/net
700.............................................................................. 851..............................................................................
701 852
702 853
703Table 1-7: Network info in /proc/net 854Table 1-9: Network info in /proc/net
704.............................................................................. 855..............................................................................
705 File Content 856 File Content
706 arp Kernel ARP table 857 arp Kernel ARP table
@@ -824,10 +975,10 @@ The directory /proc/parport contains information about the parallel ports of
824your system. It has one subdirectory for each port, named after the port 975your system. It has one subdirectory for each port, named after the port
825number (0,1,2,...). 976number (0,1,2,...).
826 977
827These directories contain the four files shown in Table 1-8. 978These directories contain the four files shown in Table 1-10.
828 979
829 980
830Table 1-8: Files in /proc/parport 981Table 1-10: Files in /proc/parport
831.............................................................................. 982..............................................................................
832 File Content 983 File Content
833 autoprobe Any IEEE-1284 device ID information that has been acquired. 984 autoprobe Any IEEE-1284 device ID information that has been acquired.
@@ -845,10 +996,10 @@ Table 1-8: Files in /proc/parport
845 996
846Information about the available and actually used tty's can be found in the 997Information about the available and actually used tty's can be found in the
847directory /proc/tty.You'll find entries for drivers and line disciplines in 998directory /proc/tty.You'll find entries for drivers and line disciplines in
848this directory, as shown in Table 1-9. 999this directory, as shown in Table 1-11.
849 1000
850 1001
851Table 1-9: Files in /proc/tty 1002Table 1-11: Files in /proc/tty
852.............................................................................. 1003..............................................................................
853 File Content 1004 File Content
854 drivers list of drivers and their usage 1005 drivers list of drivers and their usage
@@ -890,6 +1041,7 @@ since the system first booted. For a quick look, simply cat the file:
890 processes 2915 1041 processes 2915
891 procs_running 1 1042 procs_running 1
892 procs_blocked 0 1043 procs_blocked 0
1044 softirq 183433 0 21755 12 39 1137 231 21459 2263
893 1045
894The very first "cpu" line aggregates the numbers in all of the other "cpuN" 1046The very first "cpu" line aggregates the numbers in all of the other "cpuN"
895lines. These numbers identify the amount of time the CPU has spent performing 1047lines. These numbers identify the amount of time the CPU has spent performing
@@ -925,6 +1077,11 @@ CPUs.
925The "procs_blocked" line gives the number of processes currently blocked, 1077The "procs_blocked" line gives the number of processes currently blocked,
926waiting for I/O to complete. 1078waiting for I/O to complete.
927 1079
1080The "softirq" line gives counts of softirqs serviced since boot time, for each
1081of the possible system softirqs. The first column is the total of all
1082softirqs serviced; each subsequent column is the total for that particular
1083softirq.
1084
928 1085
9291.9 Ext4 file system parameters 10861.9 Ext4 file system parameters
930------------------------------ 1087------------------------------
@@ -933,34 +1090,13 @@ Information about mounted ext4 file systems can be found in
933/proc/fs/ext4. Each mounted filesystem will have a directory in 1090/proc/fs/ext4. Each mounted filesystem will have a directory in
934/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or 1091/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
935/proc/fs/ext4/dm-0). The files in each per-device directory are shown 1092/proc/fs/ext4/dm-0). The files in each per-device directory are shown
936in Table 1-10, below. 1093in Table 1-12, below.
937 1094
938Table 1-10: Files in /proc/fs/ext4/<devname> 1095Table 1-12: Files in /proc/fs/ext4/<devname>
939.............................................................................. 1096..............................................................................
940 File Content 1097 File Content
941 mb_groups details of multiblock allocator buddy cache of free blocks 1098 mb_groups details of multiblock allocator buddy cache of free blocks
942 mb_history multiblock allocation history 1099 mb_history multiblock allocation history
943 stats controls whether the multiblock allocator should start
944 collecting statistics, which are shown during the unmount
945 group_prealloc the multiblock allocator will round up allocation
946 requests to a multiple of this tuning parameter if the
947 stripe size is not set in the ext4 superblock
948 max_to_scan The maximum number of extents the multiblock allocator
949 will search to find the best extent
950 min_to_scan The minimum number of extents the multiblock allocator
951 will search to find the best extent
952 order2_req Tuning parameter which controls the minimum size for
953 requests (as a power of 2) where the buddy cache is
954 used
955 stream_req Files which have fewer blocks than this tunable
956 parameter will have their blocks allocated out of a
957 block group specific preallocation pool, so that small
958 files are packed closely together. Each large file
959 will have its blocks allocated out of its own unique
960 preallocation pool.
961inode_readahead Tuning parameter which controls the maximum number of
962 inode table blocks that ext4's inode table readahead
963 algorithm will pre-read into the buffer cache
964.............................................................................. 1100..............................................................................
965 1101
966 1102
@@ -1011,1028 +1147,33 @@ review the kernel documentation in the directory /usr/src/linux/Documentation.
1011This chapter is heavily based on the documentation included in the pre 2.2 1147This chapter is heavily based on the documentation included in the pre 2.2
1012kernels, and became part of it in version 2.2.1 of the Linux kernel. 1148kernels, and became part of it in version 2.2.1 of the Linux kernel.
1013 1149
10142.1 /proc/sys/fs - File system data 1150Please see: Documentation/sysctls/ directory for descriptions of these
1015-----------------------------------
1016
1017This subdirectory contains specific file system, file handle, inode, dentry
1018and quota information.
1019
1020Currently, these files are in /proc/sys/fs:
1021
1022dentry-state
1023------------
1024
1025Status of the directory cache. Since directory entries are dynamically
1026allocated and deallocated, this file indicates the current status. It holds
1027six values, in which the last two are not used and are always zero. The others
1028are listed in table 2-1.
1029
1030
1031Table 2-1: Status files of the directory cache
1032..............................................................................
1033 File Content
1034 nr_dentry Almost always zero
1035 nr_unused Number of unused cache entries
1036 age_limit
1037 in seconds after the entry may be reclaimed, when memory is short
1038 want_pages internally
1039..............................................................................
1040
1041dquot-nr and dquot-max
1042----------------------
1043
1044The file dquot-max shows the maximum number of cached disk quota entries.
1045
1046The file dquot-nr shows the number of allocated disk quota entries and the
1047number of free disk quota entries.
1048
1049If the number of available cached disk quotas is very low and you have a large
1050number of simultaneous system users, you might want to raise the limit.
1051
1052file-nr and file-max
1053--------------------
1054
1055The kernel allocates file handles dynamically, but doesn't free them again at
1056this time.
1057
1058The value in file-max denotes the maximum number of file handles that the
1059Linux kernel will allocate. When you get a lot of error messages about running
1060out of file handles, you might want to raise this limit. The default value is
106110% of RAM in kilobytes. To change it, just write the new number into the
1062file:
1063
1064 # cat /proc/sys/fs/file-max
1065 4096
1066 # echo 8192 > /proc/sys/fs/file-max
1067 # cat /proc/sys/fs/file-max
1068 8192
1069
1070
1071This method of revision is useful for all customizable parameters of the
1072kernel - simply echo the new value to the corresponding file.
1073
1074Historically, the three values in file-nr denoted the number of allocated file
1075handles, the number of allocated but unused file handles, and the maximum
1076number of file handles. Linux 2.6 always reports 0 as the number of free file
1077handles -- this is not an error, it just means that the number of allocated
1078file handles exactly matches the number of used file handles.
1079
1080Attempts to allocate more file descriptors than file-max are reported with
1081printk, look for "VFS: file-max limit <number> reached".
1082
1083inode-state and inode-nr
1084------------------------
1085
1086The file inode-nr contains the first two items from inode-state, so we'll skip
1087to that file...
1088
1089inode-state contains two actual numbers and five dummy values. The numbers
1090are nr_inodes and nr_free_inodes (in order of appearance).
1091
1092nr_inodes
1093~~~~~~~~~
1094
1095Denotes the number of inodes the system has allocated. This number will
1096grow and shrink dynamically.
1097
1098nr_open
1099-------
1100
1101Denotes the maximum number of file-handles a process can
1102allocate. Default value is 1024*1024 (1048576) which should be
1103enough for most machines. Actual limit depends on RLIMIT_NOFILE
1104resource limit.
1105
1106nr_free_inodes
1107--------------
1108
1109Represents the number of free inodes. Ie. The number of inuse inodes is
1110(nr_inodes - nr_free_inodes).
1111
1112aio-nr and aio-max-nr
1113---------------------
1114
1115aio-nr is the running total of the number of events specified on the
1116io_setup system call for all currently active aio contexts. If aio-nr
1117reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
1118raising aio-max-nr does not result in the pre-allocation or re-sizing
1119of any kernel data structures.
1120
11212.2 /proc/sys/fs/binfmt_misc - Miscellaneous binary formats
1122-----------------------------------------------------------
1123
1124Besides these files, there is the subdirectory /proc/sys/fs/binfmt_misc. This
1125handles the kernel support for miscellaneous binary formats.
1126
1127Binfmt_misc provides the ability to register additional binary formats to the
1128Kernel without compiling an additional module/kernel. Therefore, binfmt_misc
1129needs to know magic numbers at the beginning or the filename extension of the
1130binary.
1131
1132It works by maintaining a linked list of structs that contain a description of
1133a binary format, including a magic with size (or the filename extension),
1134offset and mask, and the interpreter name. On request it invokes the given
1135interpreter with the original program as argument, as binfmt_java and
1136binfmt_em86 and binfmt_mz do. Since binfmt_misc does not define any default
1137binary-formats, you have to register an additional binary-format.
1138
1139There are two general files in binfmt_misc and one file per registered format.
1140The two general files are register and status.
1141
1142Registering a new binary format
1143-------------------------------
1144
1145To register a new binary format you have to issue the command
1146
1147 echo :name:type:offset:magic:mask:interpreter: > /proc/sys/fs/binfmt_misc/register
1148
1149
1150
1151with appropriate name (the name for the /proc-dir entry), offset (defaults to
11520, if omitted), magic, mask (which can be omitted, defaults to all 0xff) and
1153last but not least, the interpreter that is to be invoked (for example and
1154testing /bin/echo). Type can be M for usual magic matching or E for filename
1155extension matching (give extension in place of magic).
1156
1157Check or reset the status of the binary format handler
1158------------------------------------------------------
1159
1160If you do a cat on the file /proc/sys/fs/binfmt_misc/status, you will get the
1161current status (enabled/disabled) of binfmt_misc. Change the status by echoing
11620 (disables) or 1 (enables) or -1 (caution: this clears all previously
1163registered binary formats) to status. For example echo 0 > status to disable
1164binfmt_misc (temporarily).
1165
1166Status of a single handler
1167--------------------------
1168
1169Each registered handler has an entry in /proc/sys/fs/binfmt_misc. These files
1170perform the same function as status, but their scope is limited to the actual
1171binary format. By cating this file, you also receive all related information
1172about the interpreter/magic of the binfmt.
1173
1174Example usage of binfmt_misc (emulate binfmt_java)
1175--------------------------------------------------
1176
1177 cd /proc/sys/fs/binfmt_misc
1178 echo ':Java:M::\xca\xfe\xba\xbe::/usr/local/java/bin/javawrapper:' > register
1179 echo ':HTML:E::html::/usr/local/java/bin/appletviewer:' > register
1180 echo ':Applet:M::<!--applet::/usr/local/java/bin/appletviewer:' > register
1181 echo ':DEXE:M::\x0eDEX::/usr/bin/dosexec:' > register
1182
1183
1184These four lines add support for Java executables and Java applets (like
1185binfmt_java, additionally recognizing the .html extension with no need to put
1186<!--applet> to every applet file). You have to install the JDK and the
1187shell-script /usr/local/java/bin/javawrapper too. It works around the
1188brokenness of the Java filename handling. To add a Java binary, just create a
1189link to the class-file somewhere in the path.
1190
11912.3 /proc/sys/kernel - general kernel parameters
1192------------------------------------------------
1193
1194This directory reflects general kernel behaviors. As I've said before, the
1195contents depend on your configuration. Here you'll find the most important
1196files, along with descriptions of what they mean and how to use them.
1197
1198acct
1199----
1200
1201The file contains three values; highwater, lowwater, and frequency.
1202
1203It exists only when BSD-style process accounting is enabled. These values
1204control its behavior. If the free space on the file system where the log lives
1205goes below lowwater percentage, accounting suspends. If it goes above
1206highwater percentage, accounting resumes. Frequency determines how often you
1207check the amount of free space (value is in seconds). Default settings are: 4,
12082, and 30. That is, suspend accounting if there is less than 2 percent free;
1209resume it if we have a value of 3 or more percent; consider information about
1210the amount of free space valid for 30 seconds
1211
1212ctrl-alt-del
1213------------
1214
1215When the value in this file is 0, ctrl-alt-del is trapped and sent to the init
1216program to handle a graceful restart. However, when the value is greater that
1217zero, Linux's reaction to this key combination will be an immediate reboot,
1218without syncing its dirty buffers.
1219
1220[NOTE]
1221 When a program (like dosemu) has the keyboard in raw mode, the
1222 ctrl-alt-del is intercepted by the program before it ever reaches the
1223 kernel tty layer, and it is up to the program to decide what to do with
1224 it.
1225
1226domainname and hostname
1227-----------------------
1228
1229These files can be controlled to set the NIS domainname and hostname of your
1230box. For the classic darkstar.frop.org a simple:
1231
1232 # echo "darkstar" > /proc/sys/kernel/hostname
1233 # echo "frop.org" > /proc/sys/kernel/domainname
1234
1235
1236would suffice to set your hostname and NIS domainname.
1237
1238osrelease, ostype and version
1239-----------------------------
1240
1241The names make it pretty obvious what these fields contain:
1242
1243 > cat /proc/sys/kernel/osrelease
1244 2.2.12
1245
1246 > cat /proc/sys/kernel/ostype
1247 Linux
1248
1249 > cat /proc/sys/kernel/version
1250 #4 Fri Oct 1 12:41:14 PDT 1999
1251
1252
1253The files osrelease and ostype should be clear enough. Version needs a little
1254more clarification. The #4 means that this is the 4th kernel built from this
1255source base and the date after it indicates the time the kernel was built. The
1256only way to tune these values is to rebuild the kernel.
1257
1258panic
1259-----
1260
1261The value in this file represents the number of seconds the kernel waits
1262before rebooting on a panic. When you use the software watchdog, the
1263recommended setting is 60. If set to 0, the auto reboot after a kernel panic
1264is disabled, which is the default setting.
1265
1266printk
1267------
1268
1269The four values in printk denote
1270* console_loglevel,
1271* default_message_loglevel,
1272* minimum_console_loglevel and
1273* default_console_loglevel
1274respectively.
1275
1276These values influence printk() behavior when printing or logging error
1277messages, which come from inside the kernel. See syslog(2) for more
1278information on the different log levels.
1279
1280console_loglevel
1281----------------
1282
1283Messages with a higher priority than this will be printed to the console.
1284
1285default_message_level
1286---------------------
1287
1288Messages without an explicit priority will be printed with this priority.
1289
1290minimum_console_loglevel
1291------------------------
1292
1293Minimum (highest) value to which the console_loglevel can be set.
1294
1295default_console_loglevel
1296------------------------
1297
1298Default value for console_loglevel.
1299
1300sg-big-buff
1301-----------
1302
1303This file shows the size of the generic SCSI (sg) buffer. At this point, you
1304can't tune it yet, but you can change it at compile time by editing
1305include/scsi/sg.h and changing the value of SG_BIG_BUFF.
1306
1307If you use a scanner with SANE (Scanner Access Now Easy) you might want to set
1308this to a higher value. Refer to the SANE documentation on this issue.
1309
1310modprobe
1311--------
1312
1313The location where the modprobe binary is located. The kernel uses this
1314program to load modules on demand.
1315
1316unknown_nmi_panic
1317-----------------
1318
1319The value in this file affects behavior of handling NMI. When the value is
1320non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
1321debugging information is displayed on console.
1322
1323NMI switch that most IA32 servers have fires unknown NMI up, for example.
1324If a system hangs up, try pressing the NMI switch.
1325
1326panic_on_unrecovered_nmi
1327------------------------
1328
1329The default Linux behaviour on an NMI of either memory or unknown is to continue
1330operation. For many environments such as scientific computing it is preferable
1331that the box is taken out and the error dealt with than an uncorrected
1332parity/ECC error get propogated.
1333
1334A small number of systems do generate NMI's for bizarre random reasons such as
1335power management so the default is off. That sysctl works like the existing
1336panic controls already in that directory.
1337
1338nmi_watchdog
1339------------
1340
1341Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
1342the NMI watchdog is enabled and will continuously test all online cpus to
1343determine whether or not they are still functioning properly. Currently,
1344passing "nmi_watchdog=" parameter at boot time is required for this function
1345to work.
1346
1347If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
1348NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
1349oprofile may have more registers to utilize.
1350
1351msgmni
1352------
1353
1354Maximum number of message queue ids on the system.
1355This value scales to the amount of lowmem. It is automatically recomputed
1356upon memory add/remove or ipc namespace creation/removal.
1357When a value is written into this file, msgmni's value becomes fixed, i.e. it
1358is not recomputed anymore when one of the above events occurs.
1359Use auto_msgmni to change this behavior.
1360
1361auto_msgmni
1362-----------
1363
1364Enables/Disables automatic recomputing of msgmni upon memory add/remove or
1365upon ipc namespace creation/removal (see the msgmni description above).
1366Echoing "1" into this file enables msgmni automatic recomputing.
1367Echoing "0" turns it off.
1368auto_msgmni default value is 1.
1369
1370
13712.4 /proc/sys/vm - The virtual memory subsystem
1372-----------------------------------------------
1373
1374Please see: Documentation/sysctls/vm.txt for a description of these
1375entries. 1151entries.
1376 1152
1153------------------------------------------------------------------------------
1154Summary
1155------------------------------------------------------------------------------
1156Certain aspects of kernel behavior can be modified at runtime, without the
1157need to recompile the kernel, or even to reboot the system. The files in the
1158/proc/sys tree can not only be read, but also modified. You can use the echo
1159command to write value into these files, thereby changing the default settings
1160of the kernel.
1161------------------------------------------------------------------------------
1377 1162
13782.5 /proc/sys/dev - Device specific parameters 1163------------------------------------------------------------------------------
1379---------------------------------------------- 1164CHAPTER 3: PER-PROCESS PARAMETERS
1380 1165------------------------------------------------------------------------------
1381Currently there is only support for CDROM drives, and for those, there is only
1382one read-only file containing information about the CD-ROM drives attached to
1383the system:
1384
1385 >cat /proc/sys/dev/cdrom/info
1386 CD-ROM information, Id: cdrom.c 2.55 1999/04/25
1387
1388 drive name: sr0 hdb
1389 drive speed: 32 40
1390 drive # of slots: 1 0
1391 Can close tray: 1 1
1392 Can open tray: 1 1
1393 Can lock tray: 1 1
1394 Can change speed: 1 1
1395 Can select disk: 0 1
1396 Can read multisession: 1 1
1397 Can read MCN: 1 1
1398 Reports media changed: 1 1
1399 Can play audio: 1 1
1400
1401
1402You see two drives, sr0 and hdb, along with a list of their features.
1403
14042.6 /proc/sys/sunrpc - Remote procedure calls
1405---------------------------------------------
1406
1407This directory contains four files, which enable or disable debugging for the
1408RPC functions NFS, NFS-daemon, RPC and NLM. The default values are 0. They can
1409be set to one to turn debugging on. (The default value is 0 for each)
1410
14112.7 /proc/sys/net - Networking stuff
1412------------------------------------
1413
1414The interface to the networking parts of the kernel is located in
1415/proc/sys/net. Table 2-3 shows all possible subdirectories. You may see only
1416some of them, depending on your kernel's configuration.
1417
1418
1419Table 2-3: Subdirectories in /proc/sys/net
1420..............................................................................
1421 Directory Content Directory Content
1422 core General parameter appletalk Appletalk protocol
1423 unix Unix domain sockets netrom NET/ROM
1424 802 E802 protocol ax25 AX25
1425 ethernet Ethernet protocol rose X.25 PLP layer
1426 ipv4 IP version 4 x25 X.25 protocol
1427 ipx IPX token-ring IBM token ring
1428 bridge Bridging decnet DEC net
1429 ipv6 IP version 6
1430..............................................................................
1431
1432We will concentrate on IP networking here. Since AX15, X.25, and DEC Net are
1433only minor players in the Linux world, we'll skip them in this chapter. You'll
1434find some short info on Appletalk and IPX further on in this chapter. Review
1435the online documentation and the kernel source to get a detailed view of the
1436parameters for those protocols. In this section we'll discuss the
1437subdirectories printed in bold letters in the table above. As default values
1438are suitable for most needs, there is no need to change these values.
1439
1440/proc/sys/net/core - Network core options
1441-----------------------------------------
1442
1443rmem_default
1444------------
1445
1446The default setting of the socket receive buffer in bytes.
1447
1448rmem_max
1449--------
1450
1451The maximum receive socket buffer size in bytes.
1452
1453wmem_default
1454------------
1455
1456The default setting (in bytes) of the socket send buffer.
1457
1458wmem_max
1459--------
1460
1461The maximum send socket buffer size in bytes.
1462
1463message_burst and message_cost
1464------------------------------
1465
1466These parameters are used to limit the warning messages written to the kernel
1467log from the networking code. They enforce a rate limit to make a
1468denial-of-service attack impossible. A higher message_cost factor, results in
1469fewer messages that will be written. Message_burst controls when messages will
1470be dropped. The default settings limit warning messages to one every five
1471seconds.
1472
1473warnings
1474--------
1475
1476This controls console messages from the networking stack that can occur because
1477of problems on the network like duplicate address or bad checksums. Normally,
1478this should be enabled, but if the problem persists the messages can be
1479disabled.
1480
1481netdev_budget
1482-------------
1483
1484Maximum number of packets taken from all interfaces in one polling cycle (NAPI
1485poll). In one polling cycle interfaces which are registered to polling are
1486probed in a round-robin manner. The limit of packets in one such probe can be
1487set per-device via sysfs class/net/<device>/weight .
1488
1489netdev_max_backlog
1490------------------
1491
1492Maximum number of packets, queued on the INPUT side, when the interface
1493receives packets faster than kernel can process them.
1494
1495optmem_max
1496----------
1497
1498Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
1499of struct cmsghdr structures with appended data.
1500
1501/proc/sys/net/unix - Parameters for Unix domain sockets
1502-------------------------------------------------------
1503
1504There are only two files in this subdirectory. They control the delays for
1505deleting and destroying socket descriptors.
1506
15072.8 /proc/sys/net/ipv4 - IPV4 settings
1508--------------------------------------
1509
1510IP version 4 is still the most used protocol in Unix networking. It will be
1511replaced by IP version 6 in the next couple of years, but for the moment it's
1512the de facto standard for the internet and is used in most networking
1513environments around the world. Because of the importance of this protocol,
1514we'll have a deeper look into the subtree controlling the behavior of the IPv4
1515subsystem of the Linux kernel.
1516
1517Let's start with the entries in /proc/sys/net/ipv4.
1518
1519ICMP settings
1520-------------
1521
1522icmp_echo_ignore_all and icmp_echo_ignore_broadcasts
1523----------------------------------------------------
1524
1525Turn on (1) or off (0), if the kernel should ignore all ICMP ECHO requests, or
1526just those to broadcast and multicast addresses.
1527
1528Please note that if you accept ICMP echo requests with a broadcast/multi\-cast
1529destination address your network may be used as an exploder for denial of
1530service packet flooding attacks to other hosts.
1531
1532icmp_destunreach_rate, icmp_echoreply_rate, icmp_paramprob_rate and icmp_timeexeed_rate
1533---------------------------------------------------------------------------------------
1534
1535Sets limits for sending ICMP packets to specific targets. A value of zero
1536disables all limiting. Any positive value sets the maximum package rate in
1537hundredth of a second (on Intel systems).
1538
1539IP settings
1540-----------
1541
1542ip_autoconfig
1543-------------
1544
1545This file contains the number one if the host received its IP configuration by
1546RARP, BOOTP, DHCP or a similar mechanism. Otherwise it is zero.
1547
1548ip_default_ttl
1549--------------
1550
1551TTL (Time To Live) for IPv4 interfaces. This is simply the maximum number of
1552hops a packet may travel.
1553
1554ip_dynaddr
1555----------
1556
1557Enable dynamic socket address rewriting on interface address change. This is
1558useful for dialup interface with changing IP addresses.
1559
1560ip_forward
1561----------
1562
1563Enable or disable forwarding of IP packages between interfaces. Changing this
1564value resets all other parameters to their default values. They differ if the
1565kernel is configured as host or router.
1566
1567ip_local_port_range
1568-------------------
1569
1570Range of ports used by TCP and UDP to choose the local port. Contains two
1571numbers, the first number is the lowest port, the second number the highest
1572local port. Default is 1024-4999. Should be changed to 32768-61000 for
1573high-usage systems.
1574
1575ip_no_pmtu_disc
1576---------------
1577
1578Global switch to turn path MTU discovery off. It can also be set on a per
1579socket basis by the applications or on a per route basis.
1580
1581ip_masq_debug
1582-------------
1583
1584Enable/disable debugging of IP masquerading.
1585
1586IP fragmentation settings
1587-------------------------
1588
1589ipfrag_high_trash and ipfrag_low_trash
1590--------------------------------------
1591
1592Maximum memory used to reassemble IP fragments. When ipfrag_high_thresh bytes
1593of memory is allocated for this purpose, the fragment handler will toss
1594packets until ipfrag_low_thresh is reached.
1595
1596ipfrag_time
1597-----------
1598
1599Time in seconds to keep an IP fragment in memory.
1600
1601TCP settings
1602------------
1603
1604tcp_ecn
1605-------
1606
1607This file controls the use of the ECN bit in the IPv4 headers. This is a new
1608feature about Explicit Congestion Notification, but some routers and firewalls
1609block traffic that has this bit set, so it could be necessary to echo 0 to
1610/proc/sys/net/ipv4/tcp_ecn if you want to talk to these sites. For more info
1611you could read RFC2481.
1612
1613tcp_retrans_collapse
1614--------------------
1615
1616Bug-to-bug compatibility with some broken printers. On retransmit, try to send
1617larger packets to work around bugs in certain TCP stacks. Can be turned off by
1618setting it to zero.
1619
1620tcp_keepalive_probes
1621--------------------
1622
1623Number of keep alive probes TCP sends out, until it decides that the
1624connection is broken.
1625
1626tcp_keepalive_time
1627------------------
1628
1629How often TCP sends out keep alive messages, when keep alive is enabled. The
1630default is 2 hours.
1631
1632tcp_syn_retries
1633---------------
1634
1635Number of times initial SYNs for a TCP connection attempt will be
1636retransmitted. Should not be higher than 255. This is only the timeout for
1637outgoing connections, for incoming connections the number of retransmits is
1638defined by tcp_retries1.
1639
1640tcp_sack
1641--------
1642
1643Enable select acknowledgments after RFC2018.
1644
1645tcp_timestamps
1646--------------
1647
1648Enable timestamps as defined in RFC1323.
1649
1650tcp_stdurg
1651----------
1652
1653Enable the strict RFC793 interpretation of the TCP urgent pointer field. The
1654default is to use the BSD compatible interpretation of the urgent pointer
1655pointing to the first byte after the urgent data. The RFC793 interpretation is
1656to have it point to the last byte of urgent data. Enabling this option may
1657lead to interoperability problems. Disabled by default.
1658
1659tcp_syncookies
1660--------------
1661
1662Only valid when the kernel was compiled with CONFIG_SYNCOOKIES. Send out
1663syncookies when the syn backlog queue of a socket overflows. This is to ward
1664off the common 'syn flood attack'. Disabled by default.
1665
1666Note that the concept of a socket backlog is abandoned. This means the peer
1667may not receive reliable error messages from an over loaded server with
1668syncookies enabled.
1669
1670tcp_window_scaling
1671------------------
1672
1673Enable window scaling as defined in RFC1323.
1674
1675tcp_fin_timeout
1676---------------
1677
1678The length of time in seconds it takes to receive a final FIN before the
1679socket is always closed. This is strictly a violation of the TCP
1680specification, but required to prevent denial-of-service attacks.
1681
1682tcp_max_ka_probes
1683-----------------
1684
1685Indicates how many keep alive probes are sent per slow timer run. Should not
1686be set too high to prevent bursts.
1687
1688tcp_max_syn_backlog
1689-------------------
1690
1691Length of the per socket backlog queue. Since Linux 2.2 the backlog specified
1692in listen(2) only specifies the length of the backlog queue of already
1693established sockets. When more connection requests arrive Linux starts to drop
1694packets. When syncookies are enabled the packets are still answered and the
1695maximum queue is effectively ignored.
1696
1697tcp_retries1
1698------------
1699
1700Defines how often an answer to a TCP connection request is retransmitted
1701before giving up.
1702
1703tcp_retries2
1704------------
1705
1706Defines how often a TCP packet is retransmitted before giving up.
1707
1708Interface specific settings
1709---------------------------
1710
1711In the directory /proc/sys/net/ipv4/conf you'll find one subdirectory for each
1712interface the system knows about and one directory calls all. Changes in the
1713all subdirectory affect all interfaces, whereas changes in the other
1714subdirectories affect only one interface. All directories have the same
1715entries:
1716
1717accept_redirects
1718----------------
1719
1720This switch decides if the kernel accepts ICMP redirect messages or not. The
1721default is 'yes' if the kernel is configured for a regular host and 'no' for a
1722router configuration.
1723
1724accept_source_route
1725-------------------
1726
1727Should source routed packages be accepted or declined. The default is
1728dependent on the kernel configuration. It's 'yes' for routers and 'no' for
1729hosts.
1730
1731bootp_relay
1732~~~~~~~~~~~
1733
1734Accept packets with source address 0.b.c.d with destinations not to this host
1735as local ones. It is supposed that a BOOTP relay daemon will catch and forward
1736such packets.
1737
1738The default is 0, since this feature is not implemented yet (kernel version
17392.2.12).
1740
1741forwarding
1742----------
1743
1744Enable or disable IP forwarding on this interface.
1745
1746log_martians
1747------------
1748
1749Log packets with source addresses with no known route to kernel log.
1750
1751mc_forwarding
1752-------------
1753
1754Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE and a
1755multicast routing daemon is required.
1756
1757proxy_arp
1758---------
1759
1760Does (1) or does not (0) perform proxy ARP.
1761
1762rp_filter
1763---------
1764
1765Integer value determines if a source validation should be made. 1 means yes, 0
1766means no. Disabled by default, but local/broadcast address spoofing is always
1767on.
1768
1769If you set this to 1 on a router that is the only connection for a network to
1770the net, it will prevent spoofing attacks against your internal networks
1771(external addresses can still be spoofed), without the need for additional
1772firewall rules.
1773
1774secure_redirects
1775----------------
1776
1777Accept ICMP redirect messages only for gateways, listed in default gateway
1778list. Enabled by default.
1779
1780shared_media
1781------------
1782
1783If it is not set the kernel does not assume that different subnets on this
1784device can communicate directly. Default setting is 'yes'.
1785
1786send_redirects
1787--------------
1788
1789Determines whether to send ICMP redirects to other hosts.
1790
1791Routing settings
1792----------------
1793
1794The directory /proc/sys/net/ipv4/route contains several file to control
1795routing issues.
1796
1797error_burst and error_cost
1798--------------------------
1799
1800These parameters are used to limit how many ICMP destination unreachable to
1801send from the host in question. ICMP destination unreachable messages are
1802sent when we cannot reach the next hop while trying to transmit a packet.
1803It will also print some error messages to kernel logs if someone is ignoring
1804our ICMP redirects. The higher the error_cost factor is, the fewer
1805destination unreachable and error messages will be let through. Error_burst
1806controls when destination unreachable messages and error messages will be
1807dropped. The default settings limit warning messages to five every second.
1808
1809flush
1810-----
1811
1812Writing to this file results in a flush of the routing cache.
1813
1814gc_elasticity, gc_interval, gc_min_interval_ms, gc_timeout, gc_thresh
1815---------------------------------------------------------------------
1816
1817Values to control the frequency and behavior of the garbage collection
1818algorithm for the routing cache. gc_min_interval is deprecated and replaced
1819by gc_min_interval_ms.
1820
1821
1822max_size
1823--------
1824
1825Maximum size of the routing cache. Old entries will be purged once the cache
1826reached has this size.
1827
1828redirect_load, redirect_number
1829------------------------------
1830
1831Factors which determine if more ICPM redirects should be sent to a specific
1832host. No redirects will be sent once the load limit or the maximum number of
1833redirects has been reached.
1834
1835redirect_silence
1836----------------
1837
1838Timeout for redirects. After this period redirects will be sent again, even if
1839this has been stopped, because the load or number limit has been reached.
1840
1841Network Neighbor handling
1842-------------------------
1843
1844Settings about how to handle connections with direct neighbors (nodes attached
1845to the same link) can be found in the directory /proc/sys/net/ipv4/neigh.
1846
1847As we saw it in the conf directory, there is a default subdirectory which
1848holds the default values, and one directory for each interface. The contents
1849of the directories are identical, with the single exception that the default
1850settings contain additional options to set garbage collection parameters.
1851
1852In the interface directories you'll find the following entries:
1853
1854base_reachable_time, base_reachable_time_ms
1855-------------------------------------------
1856
1857A base value used for computing the random reachable time value as specified
1858in RFC2461.
1859
1860Expression of base_reachable_time, which is deprecated, is in seconds.
1861Expression of base_reachable_time_ms is in milliseconds.
1862
1863retrans_time, retrans_time_ms
1864-----------------------------
1865
1866The time between retransmitted Neighbor Solicitation messages.
1867Used for address resolution and to determine if a neighbor is
1868unreachable.
1869
1870Expression of retrans_time, which is deprecated, is in 1/100 seconds (for
1871IPv4) or in jiffies (for IPv6).
1872Expression of retrans_time_ms is in milliseconds.
1873
1874unres_qlen
1875----------
1876
1877Maximum queue length for a pending arp request - the number of packets which
1878are accepted from other layers while the ARP address is still resolved.
1879
1880anycast_delay
1881-------------
1882
1883Maximum for random delay of answers to neighbor solicitation messages in
1884jiffies (1/100 sec). Not yet implemented (Linux does not have anycast support
1885yet).
1886
1887ucast_solicit
1888-------------
1889
1890Maximum number of retries for unicast solicitation.
1891
1892mcast_solicit
1893-------------
1894
1895Maximum number of retries for multicast solicitation.
1896
1897delay_first_probe_time
1898----------------------
1899
1900Delay for the first time probe if the neighbor is reachable. (see
1901gc_stale_time)
1902
1903locktime
1904--------
1905
1906An ARP/neighbor entry is only replaced with a new one if the old is at least
1907locktime old. This prevents ARP cache thrashing.
1908
1909proxy_delay
1910-----------
1911
1912Maximum time (real time is random [0..proxytime]) before answering to an ARP
1913request for which we have an proxy ARP entry. In some cases, this is used to
1914prevent network flooding.
1915
1916proxy_qlen
1917----------
1918
1919Maximum queue length of the delayed proxy arp timer. (see proxy_delay).
1920
1921app_solicit
1922----------
1923
1924Determines the number of requests to send to the user level ARP daemon. Use 0
1925to turn off.
1926
1927gc_stale_time
1928-------------
1929
1930Determines how often to check for stale ARP entries. After an ARP entry is
1931stale it will be resolved again (which is useful when an IP address migrates
1932to another machine). When ucast_solicit is greater than 0 it first tries to
1933send an ARP packet directly to the known host When that fails and
1934mcast_solicit is greater than 0, an ARP request is broadcasted.
1935
19362.9 Appletalk
1937-------------
1938
1939The /proc/sys/net/appletalk directory holds the Appletalk configuration data
1940when Appletalk is loaded. The configurable parameters are:
1941
1942aarp-expiry-time
1943----------------
1944
1945The amount of time we keep an ARP entry before expiring it. Used to age out
1946old hosts.
1947
1948aarp-resolve-time
1949-----------------
1950
1951The amount of time we will spend trying to resolve an Appletalk address.
1952
1953aarp-retransmit-limit
1954---------------------
1955
1956The number of times we will retransmit a query before giving up.
1957
1958aarp-tick-time
1959--------------
1960
1961Controls the rate at which expires are checked.
1962
1963The directory /proc/net/appletalk holds the list of active Appletalk sockets
1964on a machine.
1965
1966The fields indicate the DDP type, the local address (in network:node format)
1967the remote address, the size of the transmit pending queue, the size of the
1968received queue (bytes waiting for applications to read) the state and the uid
1969owning the socket.
1970
1971/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
1972shows the name of the interface, its Appletalk address, the network range on
1973that address (or network number for phase 1 networks), and the status of the
1974interface.
1975
1976/proc/net/atalk_route lists each known network route. It lists the target
1977(network) that the route leads to, the router (may be directly connected), the
1978route flags, and the device the route is using.
1979
19802.10 IPX
1981--------
1982
1983The IPX protocol has no tunable values in proc/sys/net.
1984
1985The IPX protocol does, however, provide proc/net/ipx. This lists each IPX
1986socket giving the local and remote addresses in Novell format (that is
1987network:node:port). In accordance with the strange Novell tradition,
1988everything but the port is in hex. Not_Connected is displayed for sockets that
1989are not tied to a specific remote address. The Tx and Rx queue sizes indicate
1990the number of bytes pending for transmission and reception. The state
1991indicates the state the socket is in and the uid is the owning uid of the
1992socket.
1993
1994The /proc/net/ipx_interface file lists all IPX interfaces. For each interface
1995it gives the network number, the node number, and indicates if the network is
1996the primary network. It also indicates which device it is bound to (or
1997Internal for internal networks) and the Frame Type if appropriate. Linux
1998supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for
1999IPX.
2000
2001The /proc/net/ipx_route table holds a list of IPX routes. For each route it
2002gives the destination network, the router node (or Directly) and the network
2003address of the router (or Connected) for internal networks.
2004
20052.11 /proc/sys/fs/mqueue - POSIX message queues filesystem
2006----------------------------------------------------------
2007
2008The "mqueue" filesystem provides the necessary kernel features to enable the
2009creation of a user space library that implements the POSIX message queues
2010API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
2011Interfaces specification.)
2012
2013The "mqueue" filesystem contains values for determining/setting the amount of
2014resources used by the file system.
2015
2016/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
2017maximum number of message queues allowed on the system.
2018
2019/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
2020maximum number of messages in a queue value. In fact it is the limiting value
2021for another (user) limit which is set in mq_open invocation. This attribute of
2022a queue must be less or equal then msg_max.
2023
2024/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
2025maximum message size value (it is every message queue's attribute set during
2026its creation).
2027 1166
20282.12 /proc/<pid>/oom_adj - Adjust the oom-killer score 11673.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
2029------------------------------------------------------ 1168------------------------------------------------------
2030 1169
2031This file can be used to adjust the score used to select which processes 1170This file can be used to adjust the score used to select which processes should
2032should be killed in an out-of-memory situation. Giving it a high score will 1171be killed in an out-of-memory situation. The oom_adj value is a characteristic
2033increase the likelihood of this process being killed by the oom-killer. Valid 1172of the task's mm, so all threads that share an mm with pid will have the same
2034values are in the range -16 to +15, plus the special value -17, which disables 1173oom_adj value. A high value will increase the likelihood of this process being
2035oom-killing altogether for this process. 1174killed by the oom-killer. Valid values are in the range -16 to +15 as
1175explained below and a special value of -17, which disables oom-killing
1176altogether for threads sharing pid's mm.
2036 1177
2037The process to be killed in an out-of-memory situation is selected among all others 1178The process to be killed in an out-of-memory situation is selected among all others
2038based on its badness score. This value equals the original memory size of the process 1179based on its badness score. This value equals the original memory size of the process
@@ -2046,6 +1187,9 @@ the parent's score if they do not share the same memory. Thus forking servers
2046are the prime candidates to be killed. Having only one 'hungry' child will make 1187are the prime candidates to be killed. Having only one 'hungry' child will make
2047parent less preferable than the child. 1188parent less preferable than the child.
2048 1189
1190/proc/<pid>/oom_adj cannot be changed for kthreads since they are immune from
1191oom-killing already.
1192
2049/proc/<pid>/oom_score shows process' current badness score. 1193/proc/<pid>/oom_score shows process' current badness score.
2050 1194
2051The following heuristics are then applied: 1195The following heuristics are then applied:
@@ -2062,25 +1206,15 @@ The task with the highest badness score is then selected and its children
2062are killed, process itself will be killed in an OOM situation when it does 1206are killed, process itself will be killed in an OOM situation when it does
2063not have children or some of them disabled oom like described above. 1207not have children or some of them disabled oom like described above.
2064 1208
20652.13 /proc/<pid>/oom_score - Display current oom-killer score 12093.2 /proc/<pid>/oom_score - Display current oom-killer score
2066------------------------------------------------------------- 1210-------------------------------------------------------------
2067 1211
2068------------------------------------------------------------------------------
2069This file can be used to check the current score used by the oom-killer is for 1212This file can be used to check the current score used by the oom-killer is for
2070any given <pid>. Use it together with /proc/<pid>/oom_adj to tune which 1213any given <pid>. Use it together with /proc/<pid>/oom_adj to tune which
2071process should be killed in an out-of-memory situation. 1214process should be killed in an out-of-memory situation.
2072 1215
2073------------------------------------------------------------------------------
2074Summary
2075------------------------------------------------------------------------------
2076Certain aspects of kernel behavior can be modified at runtime, without the
2077need to recompile the kernel, or even to reboot the system. The files in the
2078/proc/sys tree can not only be read, but also modified. You can use the echo
2079command to write value into these files, thereby changing the default settings
2080of the kernel.
2081------------------------------------------------------------------------------
2082 1216
20832.14 /proc/<pid>/io - Display the IO accounting fields 12173.3 /proc/<pid>/io - Display the IO accounting fields
2084------------------------------------------------------- 1218-------------------------------------------------------
2085 1219
2086This file contains IO statistics for each running process 1220This file contains IO statistics for each running process
@@ -2182,7 +1316,7 @@ those 64-bit counters, process A could see an intermediate result.
2182More information about this can be found within the taskstats documentation in 1316More information about this can be found within the taskstats documentation in
2183Documentation/accounting. 1317Documentation/accounting.
2184 1318
21852.15 /proc/<pid>/coredump_filter - Core dump filtering settings 13193.4 /proc/<pid>/coredump_filter - Core dump filtering settings
2186--------------------------------------------------------------- 1320---------------------------------------------------------------
2187When a process is dumped, all anonymous memory is written to a core file as 1321When a process is dumped, all anonymous memory is written to a core file as
2188long as the size of the core file isn't limited. But sometimes we don't want 1322long as the size of the core file isn't limited. But sometimes we don't want
@@ -2226,7 +1360,7 @@ For example:
2226 $ echo 0x7 > /proc/self/coredump_filter 1360 $ echo 0x7 > /proc/self/coredump_filter
2227 $ ./some_program 1361 $ ./some_program
2228 1362
22292.16 /proc/<pid>/mountinfo - Information about mounts 13633.5 /proc/<pid>/mountinfo - Information about mounts
2230-------------------------------------------------------- 1364--------------------------------------------------------
2231 1365
2232This file contains lines of the form: 1366This file contains lines of the form:
@@ -2263,30 +1397,3 @@ For more information on mount propagation see:
2263 1397
2264 Documentation/filesystems/sharedsubtree.txt 1398 Documentation/filesystems/sharedsubtree.txt
2265 1399
22662.17 /proc/sys/fs/epoll - Configuration options for the epoll interface
2267--------------------------------------------------------
2268
2269This directory contains configuration options for the epoll(7) interface.
2270
2271max_user_instances
2272------------------
2273
2274This is the maximum number of epoll file descriptors that a single user can
2275have open at a given time. The default value is 128, and should be enough
2276for normal users.
2277
2278max_user_watches
2279----------------
2280
2281Every epoll file descriptor can store a number of files to be monitored
2282for event readiness. Each one of these monitored files constitutes a "watch".
2283This configuration option sets the maximum number of "watches" that are
2284allowed for each user.
2285Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
2286on a 64bit one.
2287The current default value for max_user_watches is the 1/32 of the available
2288low memory, divided for the "watch" cost in bytes.
2289
2290
2291------------------------------------------------------------------------------
2292
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 9f8740ca3f3b..85354b32d731 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -12,6 +12,7 @@ that support it. For example, a given bus might look like this:
12 | |-- enable 12 | |-- enable
13 | |-- irq 13 | |-- irq
14 | |-- local_cpus 14 | |-- local_cpus
15 | |-- remove
15 | |-- resource 16 | |-- resource
16 | |-- resource0 17 | |-- resource0
17 | |-- resource1 18 | |-- resource1
@@ -36,6 +37,7 @@ files, each with their own function.
36 enable Whether the device is enabled (ascii, rw) 37 enable Whether the device is enabled (ascii, rw)
37 irq IRQ number (ascii, ro) 38 irq IRQ number (ascii, ro)
38 local_cpus nearby CPU mask (cpumask, ro) 39 local_cpus nearby CPU mask (cpumask, ro)
40 remove remove device from kernel's list (ascii, wo)
39 resource PCI resource host addresses (ascii, ro) 41 resource PCI resource host addresses (ascii, ro)
40 resource0..N PCI resource N, if present (binary, mmap) 42 resource0..N PCI resource N, if present (binary, mmap)
41 resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) 43 resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap)
@@ -46,6 +48,7 @@ files, each with their own function.
46 48
47 ro - read only file 49 ro - read only file
48 rw - file is readable and writable 50 rw - file is readable and writable
51 wo - write only file
49 mmap - file is mmapable 52 mmap - file is mmapable
50 ascii - file contains ascii text 53 ascii - file contains ascii text
51 binary - file contains binary data 54 binary - file contains binary data
@@ -69,10 +72,17 @@ The 'rom' file is special in that it provides read-only access to the device's
69ROM file, if available. It's disabled by default, however, so applications 72ROM file, if available. It's disabled by default, however, so applications
70should write the string "1" to the file to enable it before attempting a read 73should write the string "1" to the file to enable it before attempting a read
71call, and disable it following the access by writing "0" to the file. Note 74call, and disable it following the access by writing "0" to the file. Note
72that the device must be enabled for a rom read to return data succesfully. 75that the device must be enabled for a rom read to return data successfully.
73In the event a driver is not bound to the device, it can be enabled using the 76In the event a driver is not bound to the device, it can be enabled using the
74'enable' file, documented above. 77'enable' file, documented above.
75 78
79The 'remove' file is used to remove the PCI device, by writing a non-zero
80integer to the file. This does not involve any kind of hot-plug functionality,
81e.g. powering off the device. The device is removed from the kernel's list of
82PCI devices, the sysfs directory for it is removed, and the device will be
83removed from any drivers attached to it. Removal of PCI root buses is
84disallowed.
85
76Accessing legacy resources through sysfs 86Accessing legacy resources through sysfs
77---------------------------------------- 87----------------------------------------
78 88
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 222437efd75a..3015da0c6b2a 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -133,4 +133,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root.
133Author: 133Author:
134 Christoph Rohland <cr@sap.com>, 1.12.01 134 Christoph Rohland <cr@sap.com>, 1.12.01
135Updated: 135Updated:
136 Hugh Dickins <hugh@veritas.com>, 4 June 2007 136 Hugh Dickins, 4 June 2007
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt
index fde829a756e6..902b95d0ee51 100644
--- a/Documentation/filesystems/udf.txt
+++ b/Documentation/filesystems/udf.txt
@@ -24,6 +24,8 @@ The following mount options are supported:
24 24
25 gid= Set the default group. 25 gid= Set the default group.
26 umask= Set the default umask. 26 umask= Set the default umask.
27 mode= Set the default file permissions.
28 dmode= Set the default directory permissions.
27 uid= Set the default user. 29 uid= Set the default user.
28 bs= Set the block size. 30 bs= Set the block size.
29 unhide Show otherwise hidden files. 31 unhide Show otherwise hidden files.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 3a5ddc96901a..b58b84b50fa2 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -124,14 +124,19 @@ sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as
124flush -- If set, the filesystem will try to flush to disk more 124flush -- If set, the filesystem will try to flush to disk more
125 early than normal. Not set by default. 125 early than normal. Not set by default.
126 126
127rodir -- FAT has the ATTR_RO (read-only) attribute. But on Windows, 127rodir -- FAT has the ATTR_RO (read-only) attribute. On Windows,
128 the ATTR_RO of the directory will be just ignored actually, 128 the ATTR_RO of the directory will just be ignored,
129 and is used by only applications as flag. E.g. it's setted 129 and is used only by applications as a flag (e.g. it's set
130 for the customized folder. 130 for the customized folder).
131 131
132 If you want to use ATTR_RO as read-only flag even for 132 If you want to use ATTR_RO as read-only flag even for
133 the directory, set this option. 133 the directory, set this option.
134 134
135errors=panic|continue|remount-ro
136 -- specify FAT behavior on critical errors: panic, continue
137 without doing anything or remount the partition in
138 read-only mode (default behavior).
139
135<bool>: 0,1,yes,no,true,false 140<bool>: 0,1,yes,no,true,false
136 141
137TODO 142TODO
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index deeeed0faa8f..f49eecf2e573 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -277,8 +277,7 @@ or bottom half).
277 unfreeze_fs: called when VFS is unlocking a filesystem and making it writable 277 unfreeze_fs: called when VFS is unlocking a filesystem and making it writable
278 again. 278 again.
279 279
280 statfs: called when the VFS needs to get filesystem statistics. This 280 statfs: called when the VFS needs to get filesystem statistics.
281 is called with the kernel lock held
282 281
283 remount_fs: called when the filesystem is remounted. This is called 282 remount_fs: called when the filesystem is remounted. This is called
284 with the kernel lock held 283 with the kernel lock held
diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README
index c3480aa66ba8..7eceaff63f5f 100644
--- a/Documentation/firmware_class/README
+++ b/Documentation/firmware_class/README
@@ -77,7 +77,8 @@
77 seconds for the whole load operation. 77 seconds for the whole load operation.
78 78
79 - request_firmware_nowait() is also provided for convenience in 79 - request_firmware_nowait() is also provided for convenience in
80 non-user contexts. 80 user contexts to request firmware asynchronously, but can't be called
81 in atomic contexts.
81 82
82 83
83 about in-kernel persistence: 84 about in-kernel persistence:
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
deleted file mode 100644
index 803b1318b13d..000000000000
--- a/Documentation/ftrace.txt
+++ /dev/null
@@ -1,1424 +0,0 @@
1 ftrace - Function Tracer
2 ========================
3
4Copyright 2008 Red Hat Inc.
5 Author: Steven Rostedt <srostedt@redhat.com>
6 License: The GNU Free Documentation License, Version 1.2
7 (dual licensed under the GPL v2)
8Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton,
9 John Kacur, and David Teigland.
10
11Written for: 2.6.28-rc2
12
13Introduction
14------------
15
16Ftrace is an internal tracer designed to help out developers and
17designers of systems to find what is going on inside the kernel.
18It can be used for debugging or analyzing latencies and performance
19issues that take place outside of user-space.
20
21Although ftrace is the function tracer, it also includes an
22infrastructure that allows for other types of tracing. Some of the
23tracers that are currently in ftrace include a tracer to trace
24context switches, the time it takes for a high priority task to
25run after it was woken up, the time interrupts are disabled, and
26more (ftrace allows for tracer plugins, which means that the list of
27tracers can always grow).
28
29
30The File System
31---------------
32
33Ftrace uses the debugfs file system to hold the control files as well
34as the files to display output.
35
36To mount the debugfs system:
37
38 # mkdir /debug
39 # mount -t debugfs nodev /debug
40
41(Note: it is more common to mount at /sys/kernel/debug, but for simplicity
42 this document will use /debug)
43
44That's it! (assuming that you have ftrace configured into your kernel)
45
46After mounting the debugfs, you can see a directory called
47"tracing". This directory contains the control and output files
48of ftrace. Here is a list of some of the key files:
49
50
51 Note: all time values are in microseconds.
52
53 current_tracer: This is used to set or display the current tracer
54 that is configured.
55
56 available_tracers: This holds the different types of tracers that
57 have been compiled into the kernel. The tracers
58 listed here can be configured by echoing their name
59 into current_tracer.
60
61 tracing_enabled: This sets or displays whether the current_tracer
62 is activated and tracing or not. Echo 0 into this
63 file to disable the tracer or 1 to enable it.
64
65 trace: This file holds the output of the trace in a human readable
66 format (described below).
67
68 latency_trace: This file shows the same trace but the information
69 is organized more to display possible latencies
70 in the system (described below).
71
72 trace_pipe: The output is the same as the "trace" file but this
73 file is meant to be streamed with live tracing.
74 Reads from this file will block until new data
75 is retrieved. Unlike the "trace" and "latency_trace"
76 files, this file is a consumer. This means reading
77 from this file causes sequential reads to display
78 more current data. Once data is read from this
79 file, it is consumed, and will not be read
80 again with a sequential read. The "trace" and
81 "latency_trace" files are static, and if the
82 tracer is not adding more data, they will display
83 the same information every time they are read.
84
85 trace_options: This file lets the user control the amount of data
86 that is displayed in one of the above output
87 files.
88
89 trace_max_latency: Some of the tracers record the max latency.
90 For example, the time interrupts are disabled.
91 This time is saved in this file. The max trace
92 will also be stored, and displayed by either
93 "trace" or "latency_trace". A new max trace will
94 only be recorded if the latency is greater than
95 the value in this file. (in microseconds)
96
97 buffer_size_kb: This sets or displays the number of kilobytes each CPU
98 buffer can hold. The tracer buffers are the same size
99 for each CPU. The displayed number is the size of the
100 CPU buffer and not total size of all buffers. The
101 trace buffers are allocated in pages (blocks of memory
102 that the kernel uses for allocation, usually 4 KB in size).
103 If the last page allocated has room for more bytes
104 than requested, the rest of the page will be used,
105 making the actual allocation bigger than requested.
106 (Note, the size may not be a multiple of the page size due
107 to buffer managment overhead.)
108
109 This can only be updated when the current_tracer
110 is set to "nop".
111
112 tracing_cpumask: This is a mask that lets the user only trace
113 on specified CPUS. The format is a hex string
114 representing the CPUS.
115
116 set_ftrace_filter: When dynamic ftrace is configured in (see the
117 section below "dynamic ftrace"), the code is dynamically
118 modified (code text rewrite) to disable calling of the
119 function profiler (mcount). This lets tracing be configured
120 in with practically no overhead in performance. This also
121 has a side effect of enabling or disabling specific functions
122 to be traced. Echoing names of functions into this file
123 will limit the trace to only those functions.
124
125 set_ftrace_notrace: This has an effect opposite to that of
126 set_ftrace_filter. Any function that is added here will not
127 be traced. If a function exists in both set_ftrace_filter
128 and set_ftrace_notrace, the function will _not_ be traced.
129
130 set_ftrace_pid: Have the function tracer only trace a single thread.
131
132 available_filter_functions: This lists the functions that ftrace
133 has processed and can trace. These are the function
134 names that you can pass to "set_ftrace_filter" or
135 "set_ftrace_notrace". (See the section "dynamic ftrace"
136 below for more details.)
137
138
139The Tracers
140-----------
141
142Here is the list of current tracers that may be configured.
143
144 function - function tracer that uses mcount to trace all functions.
145
146 sched_switch - traces the context switches between tasks.
147
148 irqsoff - traces the areas that disable interrupts and saves
149 the trace with the longest max latency.
150 See tracing_max_latency. When a new max is recorded,
151 it replaces the old trace. It is best to view this
152 trace via the latency_trace file.
153
154 preemptoff - Similar to irqsoff but traces and records the amount of
155 time for which preemption is disabled.
156
157 preemptirqsoff - Similar to irqsoff and preemptoff, but traces and
158 records the largest time for which irqs and/or preemption
159 is disabled.
160
161 wakeup - Traces and records the max latency that it takes for
162 the highest priority task to get scheduled after
163 it has been woken up.
164
165 nop - This is not a tracer. To remove all tracers from tracing
166 simply echo "nop" into current_tracer.
167
168
169Examples of using the tracer
170----------------------------
171
172Here are typical examples of using the tracers when controlling them only
173with the debugfs interface (without using any user-land utilities).
174
175Output format:
176--------------
177
178Here is an example of the output format of the file "trace"
179
180 --------
181# tracer: function
182#
183# TASK-PID CPU# TIMESTAMP FUNCTION
184# | | | | |
185 bash-4251 [01] 10152.583854: path_put <-path_walk
186 bash-4251 [01] 10152.583855: dput <-path_put
187 bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput
188 --------
189
190A header is printed with the tracer name that is represented by the trace.
191In this case the tracer is "function". Then a header showing the format. Task
192name "bash", the task PID "4251", the CPU that it was running on
193"01", the timestamp in <secs>.<usecs> format, the function name that was
194traced "path_put" and the parent function that called this function
195"path_walk". The timestamp is the time at which the function was
196entered.
197
198The sched_switch tracer also includes tracing of task wakeups and
199context switches.
200
201 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
202 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
203 ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R
204 events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R
205 kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
206 ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
207
208Wake ups are represented by a "+" and the context switches are shown as
209"==>". The format is:
210
211 Context switches:
212
213 Previous task Next Task
214
215 <pid>:<prio>:<state> ==> <pid>:<prio>:<state>
216
217 Wake ups:
218
219 Current task Task waking up
220
221 <pid>:<prio>:<state> + <pid>:<prio>:<state>
222
223The prio is the internal kernel priority, which is the inverse of the
224priority that is usually displayed by user-space tools. Zero represents
225the highest priority (99). Prio 100 starts the "nice" priorities with
226100 being equal to nice -20 and 139 being nice 19. The prio "140" is
227reserved for the idle task which is the lowest priority thread (pid 0).
228
229
230Latency trace format
231--------------------
232
233For traces that display latency times, the latency_trace file gives
234somewhat more information to see why a latency happened. Here is a typical
235trace.
236
237# tracer: irqsoff
238#
239irqsoff latency trace v1.1.5 on 2.6.26-rc8
240--------------------------------------------------------------------
241 latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
242 -----------------
243 | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0)
244 -----------------
245 => started at: apic_timer_interrupt
246 => ended at: do_softirq
247
248# _------=> CPU#
249# / _-----=> irqs-off
250# | / _----=> need-resched
251# || / _---=> hardirq/softirq
252# ||| / _--=> preempt-depth
253# |||| /
254# ||||| delay
255# cmd pid ||||| time | caller
256# \ / ||||| \ | /
257 <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt)
258 <idle>-0 0d.s. 97us : __do_softirq (do_softirq)
259 <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq)
260
261
262
263This shows that the current tracer is "irqsoff" tracing the time for which
264interrupts were disabled. It gives the trace version and the version
265of the kernel upon which this was executed on (2.6.26-rc8). Then it displays
266the max latency in microsecs (97 us). The number of trace entries displayed
267and the total number recorded (both are three: #3/3). The type of
268preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero
269and are reserved for later use. #P is the number of online CPUS (#P:2).
270
271The task is the process that was running when the latency occurred.
272(swapper pid: 0).
273
274The start and stop (the functions in which the interrupts were disabled and
275enabled respectively) that caused the latencies:
276
277 apic_timer_interrupt is where the interrupts were disabled.
278 do_softirq is where they were enabled again.
279
280The next lines after the header are the trace itself. The header
281explains which is which.
282
283 cmd: The name of the process in the trace.
284
285 pid: The PID of that process.
286
287 CPU#: The CPU which the process was running on.
288
289 irqs-off: 'd' interrupts are disabled. '.' otherwise.
290 Note: If the architecture does not support a way to
291 read the irq flags variable, an 'X' will always
292 be printed here.
293
294 need-resched: 'N' task need_resched is set, '.' otherwise.
295
296 hardirq/softirq:
297 'H' - hard irq occurred inside a softirq.
298 'h' - hard irq is running
299 's' - soft irq is running
300 '.' - normal context.
301
302 preempt-depth: The level of preempt_disabled
303
304The above is mostly meaningful for kernel developers.
305
306 time: This differs from the trace file output. The trace file output
307 includes an absolute timestamp. The timestamp used by the
308 latency_trace file is relative to the start of the trace.
309
310 delay: This is just to help catch your eye a bit better. And
311 needs to be fixed to be only relative to the same CPU.
312 The marks are determined by the difference between this
313 current trace and the next trace.
314 '!' - greater than preempt_mark_thresh (default 100)
315 '+' - greater than 1 microsecond
316 ' ' - less than or equal to 1 microsecond.
317
318 The rest is the same as the 'trace' file.
319
320
321trace_options
322-------------
323
324The trace_options file is used to control what gets printed in the trace
325output. To see what is available, simply cat the file:
326
327 cat /debug/tracing/trace_options
328 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
329 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
330
331To disable one of the options, echo in the option prepended with "no".
332
333 echo noprint-parent > /debug/tracing/trace_options
334
335To enable an option, leave off the "no".
336
337 echo sym-offset > /debug/tracing/trace_options
338
339Here are the available options:
340
341 print-parent - On function traces, display the calling function
342 as well as the function being traced.
343
344 print-parent:
345 bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul
346
347 noprint-parent:
348 bash-4000 [01] 1477.606694: simple_strtoul
349
350
351 sym-offset - Display not only the function name, but also the offset
352 in the function. For example, instead of seeing just
353 "ktime_get", you will see "ktime_get+0xb/0x20".
354
355 sym-offset:
356 bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0
357
358 sym-addr - this will also display the function address as well as
359 the function name.
360
361 sym-addr:
362 bash-4000 [01] 1477.606694: simple_strtoul <c0339346>
363
364 verbose - This deals with the latency_trace file.
365
366 bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
367 (+0.000ms): simple_strtoul (strict_strtoul)
368
369 raw - This will display raw numbers. This option is best for use with
370 user applications that can translate the raw numbers better than
371 having it done in the kernel.
372
373 hex - Similar to raw, but the numbers will be in a hexadecimal format.
374
375 bin - This will print out the formats in raw binary.
376
377 block - TBD (needs update)
378
379 stacktrace - This is one of the options that changes the trace itself.
380 When a trace is recorded, so is the stack of functions.
381 This allows for back traces of trace sites.
382
383 userstacktrace - This option changes the trace.
384 It records a stacktrace of the current userspace thread.
385
386 sym-userobj - when user stacktrace are enabled, look up which object the
387 address belongs to, and print a relative address
388 This is especially useful when ASLR is on, otherwise you don't
389 get a chance to resolve the address to object/file/line after the app is no
390 longer running
391
392 The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
393
394 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
395x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
396
397 sched-tree - TBD (any users??)
398
399
400sched_switch
401------------
402
403This tracer simply records schedule switches. Here is an example
404of how to use it.
405
406 # echo sched_switch > /debug/tracing/current_tracer
407 # echo 1 > /debug/tracing/tracing_enabled
408 # sleep 1
409 # echo 0 > /debug/tracing/tracing_enabled
410 # cat /debug/tracing/trace
411
412# tracer: sched_switch
413#
414# TASK-PID CPU# TIMESTAMP FUNCTION
415# | | | | |
416 bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R
417 bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R
418 sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R
419 bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S
420 bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R
421 sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R
422 bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D
423 bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R
424 <idle>-0 [00] 240.132589: 0:140:R + 4:115:S
425 <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R
426 ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R
427 <idle>-0 [00] 240.132598: 0:140:R + 4:115:S
428 <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R
429 ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R
430 sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R
431 [...]
432
433
434As we have discussed previously about this format, the header shows
435the name of the trace and points to the options. The "FUNCTION"
436is a misnomer since here it represents the wake ups and context
437switches.
438
439The sched_switch file only lists the wake ups (represented with '+')
440and context switches ('==>') with the previous task or current task
441first followed by the next task or task waking up. The format for both
442of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO
443is the inverse of the actual priority with zero (0) being the highest
444priority and the nice values starting at 100 (nice -20). Below is
445a quick chart to map the kernel priority to user land priorities.
446
447 Kernel priority: 0 to 99 ==> user RT priority 99 to 0
448 Kernel priority: 100 to 139 ==> user nice -20 to 19
449 Kernel priority: 140 ==> idle task priority
450
451The task states are:
452
453 R - running : wants to run, may not actually be running
454 S - sleep : process is waiting to be woken up (handles signals)
455 D - disk sleep (uninterruptible sleep) : process must be woken up
456 (ignores signals)
457 T - stopped : process suspended
458 t - traced : process is being traced (with something like gdb)
459 Z - zombie : process waiting to be cleaned up
460 X - unknown
461
462
463ftrace_enabled
464--------------
465
466The following tracers (listed below) give different output depending
467on whether or not the sysctl ftrace_enabled is set. To set ftrace_enabled,
468one can either use the sysctl function or set it via the proc
469file system interface.
470
471 sysctl kernel.ftrace_enabled=1
472
473 or
474
475 echo 1 > /proc/sys/kernel/ftrace_enabled
476
477To disable ftrace_enabled simply replace the '1' with '0' in
478the above commands.
479
480When ftrace_enabled is set the tracers will also record the functions
481that are within the trace. The descriptions of the tracers
482will also show an example with ftrace enabled.
483
484
485irqsoff
486-------
487
488When interrupts are disabled, the CPU can not react to any other
489external event (besides NMIs and SMIs). This prevents the timer
490interrupt from triggering or the mouse interrupt from letting the
491kernel know of a new mouse event. The result is a latency with the
492reaction time.
493
494The irqsoff tracer tracks the time for which interrupts are disabled.
495When a new maximum latency is hit, the tracer saves the trace leading up
496to that latency point so that every time a new maximum is reached, the old
497saved trace is discarded and the new trace is saved.
498
499To reset the maximum, echo 0 into tracing_max_latency. Here is an
500example:
501
502 # echo irqsoff > /debug/tracing/current_tracer
503 # echo 0 > /debug/tracing/tracing_max_latency
504 # echo 1 > /debug/tracing/tracing_enabled
505 # ls -ltr
506 [...]
507 # echo 0 > /debug/tracing/tracing_enabled
508 # cat /debug/tracing/latency_trace
509# tracer: irqsoff
510#
511irqsoff latency trace v1.1.5 on 2.6.26
512--------------------------------------------------------------------
513 latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
514 -----------------
515 | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0)
516 -----------------
517 => started at: sys_setpgid
518 => ended at: sys_setpgid
519
520# _------=> CPU#
521# / _-----=> irqs-off
522# | / _----=> need-resched
523# || / _---=> hardirq/softirq
524# ||| / _--=> preempt-depth
525# |||| /
526# ||||| delay
527# cmd pid ||||| time | caller
528# \ / ||||| \ | /
529 bash-3730 1d... 0us : _write_lock_irq (sys_setpgid)
530 bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid)
531 bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid)
532
533
534Here we see that that we had a latency of 12 microsecs (which is
535very good). The _write_lock_irq in sys_setpgid disabled interrupts.
536The difference between the 12 and the displayed timestamp 14us occurred
537because the clock was incremented between the time of recording the max
538latency and the time of recording the function that had that latency.
539
540Note the above example had ftrace_enabled not set. If we set the
541ftrace_enabled, we get a much larger output:
542
543# tracer: irqsoff
544#
545irqsoff latency trace v1.1.5 on 2.6.26-rc8
546--------------------------------------------------------------------
547 latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
548 -----------------
549 | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0)
550 -----------------
551 => started at: __alloc_pages_internal
552 => ended at: __alloc_pages_internal
553
554# _------=> CPU#
555# / _-----=> irqs-off
556# | / _----=> need-resched
557# || / _---=> hardirq/softirq
558# ||| / _--=> preempt-depth
559# |||| /
560# ||||| delay
561# cmd pid ||||| time | caller
562# \ / ||||| \ | /
563 ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal)
564 ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist)
565 ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk)
566 ls-4339 0d..1 4us : add_preempt_count (_spin_lock)
567 ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk)
568 ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue)
569 ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest)
570 ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk)
571 ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue)
572 ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest)
573 ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk)
574 ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue)
575[...]
576 ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue)
577 ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest)
578 ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk)
579 ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue)
580 ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest)
581 ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk)
582 ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock)
583 ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal)
584 ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal)
585
586
587
588Here we traced a 50 microsecond latency. But we also see all the
589functions that were called during that time. Note that by enabling
590function tracing, we incur an added overhead. This overhead may
591extend the latency times. But nevertheless, this trace has provided
592some very helpful debugging information.
593
594
595preemptoff
596----------
597
598When preemption is disabled, we may be able to receive interrupts but
599the task cannot be preempted and a higher priority task must wait
600for preemption to be enabled again before it can preempt a lower
601priority task.
602
603The preemptoff tracer traces the places that disable preemption.
604Like the irqsoff tracer, it records the maximum latency for which preemption
605was disabled. The control of preemptoff tracer is much like the irqsoff
606tracer.
607
608 # echo preemptoff > /debug/tracing/current_tracer
609 # echo 0 > /debug/tracing/tracing_max_latency
610 # echo 1 > /debug/tracing/tracing_enabled
611 # ls -ltr
612 [...]
613 # echo 0 > /debug/tracing/tracing_enabled
614 # cat /debug/tracing/latency_trace
615# tracer: preemptoff
616#
617preemptoff latency trace v1.1.5 on 2.6.26-rc8
618--------------------------------------------------------------------
619 latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
620 -----------------
621 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
622 -----------------
623 => started at: do_IRQ
624 => ended at: __do_softirq
625
626# _------=> CPU#
627# / _-----=> irqs-off
628# | / _----=> need-resched
629# || / _---=> hardirq/softirq
630# ||| / _--=> preempt-depth
631# |||| /
632# ||||| delay
633# cmd pid ||||| time | caller
634# \ / ||||| \ | /
635 sshd-4261 0d.h. 0us+: irq_enter (do_IRQ)
636 sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq)
637 sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq)
638
639
640This has some more changes. Preemption was disabled when an interrupt
641came in (notice the 'h'), and was enabled while doing a softirq.
642(notice the 's'). But we also see that interrupts have been disabled
643when entering the preempt off section and leaving it (the 'd').
644We do not know if interrupts were enabled in the mean time.
645
646# tracer: preemptoff
647#
648preemptoff latency trace v1.1.5 on 2.6.26-rc8
649--------------------------------------------------------------------
650 latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
651 -----------------
652 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
653 -----------------
654 => started at: remove_wait_queue
655 => ended at: __do_softirq
656
657# _------=> CPU#
658# / _-----=> irqs-off
659# | / _----=> need-resched
660# || / _---=> hardirq/softirq
661# ||| / _--=> preempt-depth
662# |||| /
663# ||||| delay
664# cmd pid ||||| time | caller
665# \ / ||||| \ | /
666 sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue)
667 sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue)
668 sshd-4261 0d..1 2us : do_IRQ (common_interrupt)
669 sshd-4261 0d..1 2us : irq_enter (do_IRQ)
670 sshd-4261 0d..1 2us : idle_cpu (irq_enter)
671 sshd-4261 0d..1 3us : add_preempt_count (irq_enter)
672 sshd-4261 0d.h1 3us : idle_cpu (irq_enter)
673 sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ)
674[...]
675 sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock)
676 sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq)
677 sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq)
678 sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq)
679 sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock)
680 sshd-4261 0d.h1 14us : irq_exit (do_IRQ)
681 sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit)
682 sshd-4261 0d..2 15us : do_softirq (irq_exit)
683 sshd-4261 0d... 15us : __do_softirq (do_softirq)
684 sshd-4261 0d... 16us : __local_bh_disable (__do_softirq)
685 sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable)
686 sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable)
687 sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable)
688 sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable)
689[...]
690 sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable)
691 sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable)
692 sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable)
693 sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable)
694 sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip)
695 sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip)
696 sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable)
697 sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable)
698[...]
699 sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq)
700 sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq)
701
702
703The above is an example of the preemptoff trace with ftrace_enabled
704set. Here we see that interrupts were disabled the entire time.
705The irq_enter code lets us know that we entered an interrupt 'h'.
706Before that, the functions being traced still show that it is not
707in an interrupt, but we can see from the functions themselves that
708this is not the case.
709
710Notice that __do_softirq when called does not have a preempt_count.
711It may seem that we missed a preempt enabling. What really happened
712is that the preempt count is held on the thread's stack and we
713switched to the softirq stack (4K stacks in effect). The code
714does not copy the preempt count, but because interrupts are disabled,
715we do not need to worry about it. Having a tracer like this is good
716for letting people know what really happens inside the kernel.
717
718
719preemptirqsoff
720--------------
721
722Knowing the locations that have interrupts disabled or preemption
723disabled for the longest times is helpful. But sometimes we would
724like to know when either preemption and/or interrupts are disabled.
725
726Consider the following code:
727
728 local_irq_disable();
729 call_function_with_irqs_off();
730 preempt_disable();
731 call_function_with_irqs_and_preemption_off();
732 local_irq_enable();
733 call_function_with_preemption_off();
734 preempt_enable();
735
736The irqsoff tracer will record the total length of
737call_function_with_irqs_off() and
738call_function_with_irqs_and_preemption_off().
739
740The preemptoff tracer will record the total length of
741call_function_with_irqs_and_preemption_off() and
742call_function_with_preemption_off().
743
744But neither will trace the time that interrupts and/or preemption
745is disabled. This total time is the time that we can not schedule.
746To record this time, use the preemptirqsoff tracer.
747
748Again, using this trace is much like the irqsoff and preemptoff tracers.
749
750 # echo preemptirqsoff > /debug/tracing/current_tracer
751 # echo 0 > /debug/tracing/tracing_max_latency
752 # echo 1 > /debug/tracing/tracing_enabled
753 # ls -ltr
754 [...]
755 # echo 0 > /debug/tracing/tracing_enabled
756 # cat /debug/tracing/latency_trace
757# tracer: preemptirqsoff
758#
759preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
760--------------------------------------------------------------------
761 latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
762 -----------------
763 | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0)
764 -----------------
765 => started at: apic_timer_interrupt
766 => ended at: __do_softirq
767
768# _------=> CPU#
769# / _-----=> irqs-off
770# | / _----=> need-resched
771# || / _---=> hardirq/softirq
772# ||| / _--=> preempt-depth
773# |||| /
774# ||||| delay
775# cmd pid ||||| time | caller
776# \ / ||||| \ | /
777 ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt)
778 ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq)
779 ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq)
780
781
782
783The trace_hardirqs_off_thunk is called from assembly on x86 when
784interrupts are disabled in the assembly code. Without the function
785tracing, we do not know if interrupts were enabled within the preemption
786points. We do see that it started with preemption enabled.
787
788Here is a trace with ftrace_enabled set:
789
790
791# tracer: preemptirqsoff
792#
793preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
794--------------------------------------------------------------------
795 latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
796 -----------------
797 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
798 -----------------
799 => started at: write_chan
800 => ended at: __do_softirq
801
802# _------=> CPU#
803# / _-----=> irqs-off
804# | / _----=> need-resched
805# || / _---=> hardirq/softirq
806# ||| / _--=> preempt-depth
807# |||| /
808# ||||| delay
809# cmd pid ||||| time | caller
810# \ / ||||| \ | /
811 ls-4473 0.N.. 0us : preempt_schedule (write_chan)
812 ls-4473 0dN.1 1us : _spin_lock (schedule)
813 ls-4473 0dN.1 2us : add_preempt_count (_spin_lock)
814 ls-4473 0d..2 2us : put_prev_task_fair (schedule)
815[...]
816 ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts)
817 ls-4473 0d..2 13us : __switch_to (schedule)
818 sshd-4261 0d..2 14us : finish_task_switch (schedule)
819 sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch)
820 sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave)
821 sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set)
822 sshd-4261 0d..2 16us : do_IRQ (common_interrupt)
823 sshd-4261 0d..2 17us : irq_enter (do_IRQ)
824 sshd-4261 0d..2 17us : idle_cpu (irq_enter)
825 sshd-4261 0d..2 18us : add_preempt_count (irq_enter)
826 sshd-4261 0d.h2 18us : idle_cpu (irq_enter)
827 sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ)
828 sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq)
829 sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock)
830 sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq)
831 sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock)
832[...]
833 sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq)
834 sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock)
835 sshd-4261 0d.h2 29us : irq_exit (do_IRQ)
836 sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit)
837 sshd-4261 0d..3 30us : do_softirq (irq_exit)
838 sshd-4261 0d... 30us : __do_softirq (do_softirq)
839 sshd-4261 0d... 31us : __local_bh_disable (__do_softirq)
840 sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable)
841 sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable)
842[...]
843 sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip)
844 sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip)
845 sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt)
846 sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt)
847 sshd-4261 0d.s3 45us : idle_cpu (irq_enter)
848 sshd-4261 0d.s3 46us : add_preempt_count (irq_enter)
849 sshd-4261 0d.H3 46us : idle_cpu (irq_enter)
850 sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt)
851 sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt)
852[...]
853 sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt)
854 sshd-4261 0d.H3 82us : ktime_get (tick_program_event)
855 sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get)
856 sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts)
857 sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts)
858 sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event)
859 sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event)
860 sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt)
861 sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit)
862 sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit)
863 sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable)
864[...]
865 sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action)
866 sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq)
867 sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq)
868 sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq)
869 sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable)
870 sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq)
871 sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq)
872
873
874This is a very interesting trace. It started with the preemption of
875the ls task. We see that the task had the "need_resched" bit set
876via the 'N' in the trace. Interrupts were disabled before the spin_lock
877at the beginning of the trace. We see that a schedule took place to run
878sshd. When the interrupts were enabled, we took an interrupt.
879On return from the interrupt handler, the softirq ran. We took another
880interrupt while running the softirq as we see from the capital 'H'.
881
882
883wakeup
884------
885
886In a Real-Time environment it is very important to know the wakeup
887time it takes for the highest priority task that is woken up to the
888time that it executes. This is also known as "schedule latency".
889I stress the point that this is about RT tasks. It is also important
890to know the scheduling latency of non-RT tasks, but the average
891schedule latency is better for non-RT tasks. Tools like
892LatencyTop are more appropriate for such measurements.
893
894Real-Time environments are interested in the worst case latency.
895That is the longest latency it takes for something to happen, and
896not the average. We can have a very fast scheduler that may only
897have a large latency once in a while, but that would not work well
898with Real-Time tasks. The wakeup tracer was designed to record
899the worst case wakeups of RT tasks. Non-RT tasks are not recorded
900because the tracer only records one worst case and tracing non-RT
901tasks that are unpredictable will overwrite the worst case latency
902of RT tasks.
903
904Since this tracer only deals with RT tasks, we will run this slightly
905differently than we did with the previous tracers. Instead of performing
906an 'ls', we will run 'sleep 1' under 'chrt' which changes the
907priority of the task.
908
909 # echo wakeup > /debug/tracing/current_tracer
910 # echo 0 > /debug/tracing/tracing_max_latency
911 # echo 1 > /debug/tracing/tracing_enabled
912 # chrt -f 5 sleep 1
913 # echo 0 > /debug/tracing/tracing_enabled
914 # cat /debug/tracing/latency_trace
915# tracer: wakeup
916#
917wakeup latency trace v1.1.5 on 2.6.26-rc8
918--------------------------------------------------------------------
919 latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
920 -----------------
921 | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5)
922 -----------------
923
924# _------=> CPU#
925# / _-----=> irqs-off
926# | / _----=> need-resched
927# || / _---=> hardirq/softirq
928# ||| / _--=> preempt-depth
929# |||| /
930# ||||| delay
931# cmd pid ||||| time | caller
932# \ / ||||| \ | /
933 <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process)
934 <idle>-0 1d..4 4us : schedule (cpu_idle)
935
936
937
938Running this on an idle system, we see that it only took 4 microseconds
939to perform the task switch. Note, since the trace marker in the
940schedule is before the actual "switch", we stop the tracing when
941the recorded task is about to schedule in. This may change if
942we add a new marker at the end of the scheduler.
943
944Notice that the recorded task is 'sleep' with the PID of 4901 and it
945has an rt_prio of 5. This priority is user-space priority and not
946the internal kernel priority. The policy is 1 for SCHED_FIFO and 2
947for SCHED_RR.
948
949Doing the same with chrt -r 5 and ftrace_enabled set.
950
951# tracer: wakeup
952#
953wakeup latency trace v1.1.5 on 2.6.26-rc8
954--------------------------------------------------------------------
955 latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
956 -----------------
957 | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5)
958 -----------------
959
960# _------=> CPU#
961# / _-----=> irqs-off
962# | / _----=> need-resched
963# || / _---=> hardirq/softirq
964# ||| / _--=> preempt-depth
965# |||| /
966# ||||| delay
967# cmd pid ||||| time | caller
968# \ / ||||| \ | /
969ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process)
970ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb)
971ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up)
972ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup)
973ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr)
974ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup)
975ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up)
976ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up)
977[...]
978ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt)
979ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit)
980ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit)
981ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq)
982[...]
983ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks)
984ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq)
985ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable)
986ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd)
987ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd)
988ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched)
989ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched)
990ksoftirq-7 1.N.2 33us : schedule (__cond_resched)
991ksoftirq-7 1.N.2 33us : add_preempt_count (schedule)
992ksoftirq-7 1.N.3 34us : hrtick_clear (schedule)
993ksoftirq-7 1dN.3 35us : _spin_lock (schedule)
994ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock)
995ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule)
996ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair)
997[...]
998ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline)
999ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock)
1000ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline)
1001ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock)
1002ksoftirq-7 1d..4 50us : schedule (__cond_resched)
1003
1004The interrupt went off while running ksoftirqd. This task runs at
1005SCHED_OTHER. Why did not we see the 'N' set early? This may be
1006a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K stacks
1007configured, the interrupt and softirq run with their own stack.
1008Some information is held on the top of the task's stack (need_resched
1009and preempt_count are both stored there). The setting of the NEED_RESCHED
1010bit is done directly to the task's stack, but the reading of the
1011NEED_RESCHED is done by looking at the current stack, which in this case
1012is the stack for the hard interrupt. This hides the fact that NEED_RESCHED
1013has been set. We do not see the 'N' until we switch back to the task's
1014assigned stack.
1015
1016function
1017--------
1018
1019This tracer is the function tracer. Enabling the function tracer
1020can be done from the debug file system. Make sure the ftrace_enabled is
1021set; otherwise this tracer is a nop.
1022
1023 # sysctl kernel.ftrace_enabled=1
1024 # echo function > /debug/tracing/current_tracer
1025 # echo 1 > /debug/tracing/tracing_enabled
1026 # usleep 1
1027 # echo 0 > /debug/tracing/tracing_enabled
1028 # cat /debug/tracing/trace
1029# tracer: function
1030#
1031# TASK-PID CPU# TIMESTAMP FUNCTION
1032# | | | | |
1033 bash-4003 [00] 123.638713: finish_task_switch <-schedule
1034 bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch
1035 bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq
1036 bash-4003 [00] 123.638715: hrtick_set <-schedule
1037 bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set
1038 bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave
1039 bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set
1040 bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore
1041 bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set
1042 bash-4003 [00] 123.638718: sub_preempt_count <-schedule
1043 bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule
1044 bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run
1045 bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion
1046 bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common
1047 bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq
1048[...]
1049
1050
1051Note: function tracer uses ring buffers to store the above entries.
1052The newest data may overwrite the oldest data. Sometimes using echo to
1053stop the trace is not sufficient because the tracing could have overwritten
1054the data that you wanted to record. For this reason, it is sometimes better to
1055disable tracing directly from a program. This allows you to stop the
1056tracing at the point that you hit the part that you are interested in.
1057To disable the tracing directly from a C program, something like following
1058code snippet can be used:
1059
1060int trace_fd;
1061[...]
1062int main(int argc, char *argv[]) {
1063 [...]
1064 trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY);
1065 [...]
1066 if (condition_hit()) {
1067 write(trace_fd, "0", 1);
1068 }
1069 [...]
1070}
1071
1072Note: Here we hard coded the path name. The debugfs mount is not
1073guaranteed to be at /debug (and is more commonly at /sys/kernel/debug).
1074For simple one time traces, the above is sufficent. For anything else,
1075a search through /proc/mounts may be needed to find where the debugfs
1076file-system is mounted.
1077
1078
1079Single thread tracing
1080---------------------
1081
1082By writing into /debug/tracing/set_ftrace_pid you can trace a
1083single thread. For example:
1084
1085# cat /debug/tracing/set_ftrace_pid
1086no pid
1087# echo 3111 > /debug/tracing/set_ftrace_pid
1088# cat /debug/tracing/set_ftrace_pid
10893111
1090# echo function > /debug/tracing/current_tracer
1091# cat /debug/tracing/trace | head
1092 # tracer: function
1093 #
1094 # TASK-PID CPU# TIMESTAMP FUNCTION
1095 # | | | | |
1096 yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return
1097 yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range
1098 yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel
1099 yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
1100 yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll
1101 yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll
1102# echo -1 > /debug/tracing/set_ftrace_pid
1103# cat /debug/tracing/trace |head
1104 # tracer: function
1105 #
1106 # TASK-PID CPU# TIMESTAMP FUNCTION
1107 # | | | | |
1108 ##### CPU 3 buffer started ####
1109 yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait
1110 yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry
1111 yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry
1112 yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit
1113 yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit
1114
1115If you want to trace a function when executing, you could use
1116something like this simple program:
1117
1118#include <stdio.h>
1119#include <stdlib.h>
1120#include <sys/types.h>
1121#include <sys/stat.h>
1122#include <fcntl.h>
1123#include <unistd.h>
1124
1125int main (int argc, char **argv)
1126{
1127 if (argc < 1)
1128 exit(-1);
1129
1130 if (fork() > 0) {
1131 int fd, ffd;
1132 char line[64];
1133 int s;
1134
1135 ffd = open("/debug/tracing/current_tracer", O_WRONLY);
1136 if (ffd < 0)
1137 exit(-1);
1138 write(ffd, "nop", 3);
1139
1140 fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY);
1141 s = sprintf(line, "%d\n", getpid());
1142 write(fd, line, s);
1143
1144 write(ffd, "function", 8);
1145
1146 close(fd);
1147 close(ffd);
1148
1149 execvp(argv[1], argv+1);
1150 }
1151
1152 return 0;
1153}
1154
1155dynamic ftrace
1156--------------
1157
1158If CONFIG_DYNAMIC_FTRACE is set, the system will run with
1159virtually no overhead when function tracing is disabled. The way
1160this works is the mcount function call (placed at the start of
1161every kernel function, produced by the -pg switch in gcc), starts
1162of pointing to a simple return. (Enabling FTRACE will include the
1163-pg switch in the compiling of the kernel.)
1164
1165At compile time every C file object is run through the
1166recordmcount.pl script (located in the scripts directory). This
1167script will process the C object using objdump to find all the
1168locations in the .text section that call mcount. (Note, only
1169the .text section is processed, since processing other sections
1170like .init.text may cause races due to those sections being freed).
1171
1172A new section called "__mcount_loc" is created that holds references
1173to all the mcount call sites in the .text section. This section is
1174compiled back into the original object. The final linker will add
1175all these references into a single table.
1176
1177On boot up, before SMP is initialized, the dynamic ftrace code
1178scans this table and updates all the locations into nops. It also
1179records the locations, which are added to the available_filter_functions
1180list. Modules are processed as they are loaded and before they are
1181executed. When a module is unloaded, it also removes its functions from
1182the ftrace function list. This is automatic in the module unload
1183code, and the module author does not need to worry about it.
1184
1185When tracing is enabled, kstop_machine is called to prevent races
1186with the CPUS executing code being modified (which can cause the
1187CPU to do undesireable things), and the nops are patched back
1188to calls. But this time, they do not call mcount (which is just
1189a function stub). They now call into the ftrace infrastructure.
1190
1191One special side-effect to the recording of the functions being
1192traced is that we can now selectively choose which functions we
1193wish to trace and which ones we want the mcount calls to remain as
1194nops.
1195
1196Two files are used, one for enabling and one for disabling the tracing
1197of specified functions. They are:
1198
1199 set_ftrace_filter
1200
1201and
1202
1203 set_ftrace_notrace
1204
1205A list of available functions that you can add to these files is listed
1206in:
1207
1208 available_filter_functions
1209
1210 # cat /debug/tracing/available_filter_functions
1211put_prev_task_idle
1212kmem_cache_create
1213pick_next_task_rt
1214get_online_cpus
1215pick_next_task_fair
1216mutex_lock
1217[...]
1218
1219If I am only interested in sys_nanosleep and hrtimer_interrupt:
1220
1221 # echo sys_nanosleep hrtimer_interrupt \
1222 > /debug/tracing/set_ftrace_filter
1223 # echo ftrace > /debug/tracing/current_tracer
1224 # echo 1 > /debug/tracing/tracing_enabled
1225 # usleep 1
1226 # echo 0 > /debug/tracing/tracing_enabled
1227 # cat /debug/tracing/trace
1228# tracer: ftrace
1229#
1230# TASK-PID CPU# TIMESTAMP FUNCTION
1231# | | | | |
1232 usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt
1233 usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call
1234 <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt
1235
1236To see which functions are being traced, you can cat the file:
1237
1238 # cat /debug/tracing/set_ftrace_filter
1239hrtimer_interrupt
1240sys_nanosleep
1241
1242
1243Perhaps this is not enough. The filters also allow simple wild cards.
1244Only the following are currently available
1245
1246 <match>* - will match functions that begin with <match>
1247 *<match> - will match functions that end with <match>
1248 *<match>* - will match functions that have <match> in it
1249
1250These are the only wild cards which are supported.
1251
1252 <match>*<match> will not work.
1253
1254Note: It is better to use quotes to enclose the wild cards, otherwise
1255 the shell may expand the parameters into names of files in the local
1256 directory.
1257
1258 # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
1259
1260Produces:
1261
1262# tracer: ftrace
1263#
1264# TASK-PID CPU# TIMESTAMP FUNCTION
1265# | | | | |
1266 bash-4003 [00] 1480.611794: hrtimer_init <-copy_process
1267 bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set
1268 bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear
1269 bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel
1270 <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt
1271 <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt
1272 <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt
1273 <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt
1274 <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt
1275
1276
1277Notice that we lost the sys_nanosleep.
1278
1279 # cat /debug/tracing/set_ftrace_filter
1280hrtimer_run_queues
1281hrtimer_run_pending
1282hrtimer_init
1283hrtimer_cancel
1284hrtimer_try_to_cancel
1285hrtimer_forward
1286hrtimer_start
1287hrtimer_reprogram
1288hrtimer_force_reprogram
1289hrtimer_get_next_event
1290hrtimer_interrupt
1291hrtimer_nanosleep
1292hrtimer_wakeup
1293hrtimer_get_remaining
1294hrtimer_get_res
1295hrtimer_init_sleeper
1296
1297
1298This is because the '>' and '>>' act just like they do in bash.
1299To rewrite the filters, use '>'
1300To append to the filters, use '>>'
1301
1302To clear out a filter so that all functions will be recorded again:
1303
1304 # echo > /debug/tracing/set_ftrace_filter
1305 # cat /debug/tracing/set_ftrace_filter
1306 #
1307
1308Again, now we want to append.
1309
1310 # echo sys_nanosleep > /debug/tracing/set_ftrace_filter
1311 # cat /debug/tracing/set_ftrace_filter
1312sys_nanosleep
1313 # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter
1314 # cat /debug/tracing/set_ftrace_filter
1315hrtimer_run_queues
1316hrtimer_run_pending
1317hrtimer_init
1318hrtimer_cancel
1319hrtimer_try_to_cancel
1320hrtimer_forward
1321hrtimer_start
1322hrtimer_reprogram
1323hrtimer_force_reprogram
1324hrtimer_get_next_event
1325hrtimer_interrupt
1326sys_nanosleep
1327hrtimer_nanosleep
1328hrtimer_wakeup
1329hrtimer_get_remaining
1330hrtimer_get_res
1331hrtimer_init_sleeper
1332
1333
1334The set_ftrace_notrace prevents those functions from being traced.
1335
1336 # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace
1337
1338Produces:
1339
1340# tracer: ftrace
1341#
1342# TASK-PID CPU# TIMESTAMP FUNCTION
1343# | | | | |
1344 bash-4043 [01] 115.281644: finish_task_switch <-schedule
1345 bash-4043 [01] 115.281645: hrtick_set <-schedule
1346 bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set
1347 bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run
1348 bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion
1349 bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run
1350 bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop
1351 bash-4043 [01] 115.281648: wake_up_process <-kthread_stop
1352 bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process
1353
1354We can see that there's no more lock or preempt tracing.
1355
1356trace_pipe
1357----------
1358
1359The trace_pipe outputs the same content as the trace file, but the effect
1360on the tracing is different. Every read from trace_pipe is consumed.
1361This means that subsequent reads will be different. The trace
1362is live.
1363
1364 # echo function > /debug/tracing/current_tracer
1365 # cat /debug/tracing/trace_pipe > /tmp/trace.out &
1366[1] 4153
1367 # echo 1 > /debug/tracing/tracing_enabled
1368 # usleep 1
1369 # echo 0 > /debug/tracing/tracing_enabled
1370 # cat /debug/tracing/trace
1371# tracer: function
1372#
1373# TASK-PID CPU# TIMESTAMP FUNCTION
1374# | | | | |
1375
1376 #
1377 # cat /tmp/trace.out
1378 bash-4043 [00] 41.267106: finish_task_switch <-schedule
1379 bash-4043 [00] 41.267106: hrtick_set <-schedule
1380 bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set
1381 bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run
1382 bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion
1383 bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run
1384 bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop
1385 bash-4043 [00] 41.267110: wake_up_process <-kthread_stop
1386 bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process
1387 bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up
1388
1389
1390Note, reading the trace_pipe file will block until more input is added.
1391By changing the tracer, trace_pipe will issue an EOF. We needed
1392to set the function tracer _before_ we "cat" the trace_pipe file.
1393
1394
1395trace entries
1396-------------
1397
1398Having too much or not enough data can be troublesome in diagnosing
1399an issue in the kernel. The file buffer_size_kb is used to modify
1400the size of the internal trace buffers. The number listed
1401is the number of entries that can be recorded per CPU. To know
1402the full size, multiply the number of possible CPUS with the
1403number of entries.
1404
1405 # cat /debug/tracing/buffer_size_kb
14061408 (units kilobytes)
1407
1408Note, to modify this, you must have tracing completely disabled. To do that,
1409echo "nop" into the current_tracer. If the current_tracer is not set
1410to "nop", an EINVAL error will be returned.
1411
1412 # echo nop > /debug/tracing/current_tracer
1413 # echo 10000 > /debug/tracing/buffer_size_kb
1414 # cat /debug/tracing/buffer_size_kb
141510000 (units kilobytes)
1416
1417The number of pages which will be allocated is limited to a percentage
1418of available memory. Allocating too much will produce an error.
1419
1420 # echo 1000000000000 > /debug/tracing/buffer_size_kb
1421-bash: echo: write error: Cannot allocate memory
1422 # cat /debug/tracing/buffer_size_kb
142385
1424
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644
index 000000000000..9dc1ff4fd536
--- /dev/null
+++ b/Documentation/futex-requeue-pi.txt
@@ -0,0 +1,131 @@
1Futex Requeue PI
2----------------
3
4Requeueing of tasks from a non-PI futex to a PI futex requires
5special handling in order to ensure the underlying rt_mutex is never
6left without an owner if it has waiters; doing so would break the PI
7boosting logic [see rt-mutex-desgin.txt] For the purposes of
8brevity, this action will be referred to as "requeue_pi" throughout
9this document. Priority inheritance is abbreviated throughout as
10"PI".
11
12Motivation
13----------
14
15Without requeue_pi, the glibc implementation of
16pthread_cond_broadcast() must resort to waking all the tasks waiting
17on a pthread_condvar and letting them try to sort out which task
18gets to run first in classic thundering-herd formation. An ideal
19implementation would wake the highest-priority waiter, and leave the
20rest to the natural wakeup inherent in unlocking the mutex
21associated with the condvar.
22
23Consider the simplified glibc calls:
24
25/* caller must lock mutex */
26pthread_cond_wait(cond, mutex)
27{
28 lock(cond->__data.__lock);
29 unlock(mutex);
30 do {
31 unlock(cond->__data.__lock);
32 futex_wait(cond->__data.__futex);
33 lock(cond->__data.__lock);
34 } while(...)
35 unlock(cond->__data.__lock);
36 lock(mutex);
37}
38
39pthread_cond_broadcast(cond)
40{
41 lock(cond->__data.__lock);
42 unlock(cond->__data.__lock);
43 futex_requeue(cond->data.__futex, cond->mutex);
44}
45
46Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
47has waiters. Note that pthread_cond_wait() attempts to lock the
48mutex only after it has returned to user space. This will leave the
49underlying rt_mutex with waiters, and no owner, breaking the
50previously mentioned PI-boosting algorithms.
51
52In order to support PI-aware pthread_condvar's, the kernel needs to
53be able to requeue tasks to PI futexes. This support implies that
54upon a successful futex_wait system call, the caller would return to
55user space already holding the PI futex. The glibc implementation
56would be modified as follows:
57
58
59/* caller must lock mutex */
60pthread_cond_wait_pi(cond, mutex)
61{
62 lock(cond->__data.__lock);
63 unlock(mutex);
64 do {
65 unlock(cond->__data.__lock);
66 futex_wait_requeue_pi(cond->__data.__futex);
67 lock(cond->__data.__lock);
68 } while(...)
69 unlock(cond->__data.__lock);
70 /* the kernel acquired the the mutex for us */
71}
72
73pthread_cond_broadcast_pi(cond)
74{
75 lock(cond->__data.__lock);
76 unlock(cond->__data.__lock);
77 futex_requeue_pi(cond->data.__futex, cond->mutex);
78}
79
80The actual glibc implementation will likely test for PI and make the
81necessary changes inside the existing calls rather than creating new
82calls for the PI cases. Similar changes are needed for
83pthread_cond_timedwait() and pthread_cond_signal().
84
85Implementation
86--------------
87
88In order to ensure the rt_mutex has an owner if it has waiters, it
89is necessary for both the requeue code, as well as the waiting code,
90to be able to acquire the rt_mutex before returning to user space.
91The requeue code cannot simply wake the waiter and leave it to
92acquire the rt_mutex as it would open a race window between the
93requeue call returning to user space and the waiter waking and
94starting to run. This is especially true in the uncontended case.
95
96The solution involves two new rt_mutex helper routines,
97rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
98allow the requeue code to acquire an uncontended rt_mutex on behalf
99of the waiter and to enqueue the waiter on a contended rt_mutex.
100Two new system calls provide the kernel<->user interface to
101requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
102
103FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
104and pthread_cond_timedwait()) to block on the initial futex and wait
105to be requeued to a PI-aware futex. The implementation is the
106result of a high-speed collision between futex_wait() and
107futex_lock_pi(), with some extra logic to check for the additional
108wake-up scenarios.
109
110FUTEX_REQUEUE_CMP_PI is called by the waker
111(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
112possibly wake the waiting tasks. Internally, this system call is
113still handled by futex_requeue (by passing requeue_pi=1). Before
114requeueing, futex_requeue() attempts to acquire the requeue target
115PI futex on behalf of the top waiter. If it can, this waiter is
116woken. futex_requeue() then proceeds to requeue the remaining
117nr_wake+nr_requeue tasks to the PI futex, calling
118rt_mutex_start_proxy_lock() prior to each requeue to prepare the
119task as a waiter on the underlying rt_mutex. It is possible that
120the lock can be acquired at this stage as well, if so, the next
121waiter is woken to finish the acquisition of the lock.
122
123FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
124their sum is all that really matters. futex_requeue() will wake or
125requeue up to nr_wake + nr_requeue tasks. It will wake only as many
126tasks as it can acquire the lock for, which in the majority of cases
127should be 0 as good programming practice dictates that the caller of
128either pthread_cond_broadcast() or pthread_cond_signal() acquire the
129mutex prior to making the call. FUTEX_REQUEUE_PI requires that
130nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
131signal.
diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt
new file mode 100644
index 000000000000..e716aadb3a33
--- /dev/null
+++ b/Documentation/gcov.txt
@@ -0,0 +1,246 @@
1Using gcov with the Linux kernel
2================================
3
41. Introduction
52. Preparation
63. Customization
74. Files
85. Modules
96. Separated build and test machines
107. Troubleshooting
11Appendix A: sample script: gather_on_build.sh
12Appendix B: sample script: gather_on_test.sh
13
14
151. Introduction
16===============
17
18gcov profiling kernel support enables the use of GCC's coverage testing
19tool gcov [1] with the Linux kernel. Coverage data of a running kernel
20is exported in gcov-compatible format via the "gcov" debugfs directory.
21To get coverage data for a specific file, change to the kernel build
22directory and use gcov with the -o option as follows (requires root):
23
24# cd /tmp/linux-out
25# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
26
27This will create source code files annotated with execution counts
28in the current directory. In addition, graphical gcov front-ends such
29as lcov [2] can be used to automate the process of collecting data
30for the entire kernel and provide coverage overviews in HTML format.
31
32Possible uses:
33
34* debugging (has this line been reached at all?)
35* test improvement (how do I change my test to cover these lines?)
36* minimizing kernel configurations (do I need this option if the
37 associated code is never run?)
38
39--
40
41[1] http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
42[2] http://ltp.sourceforge.net/coverage/lcov.php
43
44
452. Preparation
46==============
47
48Configure the kernel with:
49
50 CONFIG_DEBUGFS=y
51 CONFIG_GCOV_KERNEL=y
52
53and to get coverage data for the entire kernel:
54
55 CONFIG_GCOV_PROFILE_ALL=y
56
57Note that kernels compiled with profiling flags will be significantly
58larger and run slower. Also CONFIG_GCOV_PROFILE_ALL may not be supported
59on all architectures.
60
61Profiling data will only become accessible once debugfs has been
62mounted:
63
64 mount -t debugfs none /sys/kernel/debug
65
66
673. Customization
68================
69
70To enable profiling for specific files or directories, add a line
71similar to the following to the respective kernel Makefile:
72
73 For a single file (e.g. main.o):
74 GCOV_PROFILE_main.o := y
75
76 For all files in one directory:
77 GCOV_PROFILE := y
78
79To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
80is specified, use:
81
82 GCOV_PROFILE_main.o := n
83 and:
84 GCOV_PROFILE := n
85
86Only files which are linked to the main kernel image or are compiled as
87kernel modules are supported by this mechanism.
88
89
904. Files
91========
92
93The gcov kernel support creates the following files in debugfs:
94
95 /sys/kernel/debug/gcov
96 Parent directory for all gcov-related files.
97
98 /sys/kernel/debug/gcov/reset
99 Global reset file: resets all coverage data to zero when
100 written to.
101
102 /sys/kernel/debug/gcov/path/to/compile/dir/file.gcda
103 The actual gcov data file as understood by the gcov
104 tool. Resets file coverage data to zero when written to.
105
106 /sys/kernel/debug/gcov/path/to/compile/dir/file.gcno
107 Symbolic link to a static data file required by the gcov
108 tool. This file is generated by gcc when compiling with
109 option -ftest-coverage.
110
111
1125. Modules
113==========
114
115Kernel modules may contain cleanup code which is only run during
116module unload time. The gcov mechanism provides a means to collect
117coverage data for such code by keeping a copy of the data associated
118with the unloaded module. This data remains available through debugfs.
119Once the module is loaded again, the associated coverage counters are
120initialized with the data from its previous instantiation.
121
122This behavior can be deactivated by specifying the gcov_persist kernel
123parameter:
124
125 gcov_persist=0
126
127At run-time, a user can also choose to discard data for an unloaded
128module by writing to its data file or the global reset file.
129
130
1316. Separated build and test machines
132====================================
133
134The gcov kernel profiling infrastructure is designed to work out-of-the
135box for setups where kernels are built and run on the same machine. In
136cases where the kernel runs on a separate machine, special preparations
137must be made, depending on where the gcov tool is used:
138
139a) gcov is run on the TEST machine
140
141The gcov tool version on the test machine must be compatible with the
142gcc version used for kernel build. Also the following files need to be
143copied from build to test machine:
144
145from the source tree:
146 - all C source files + headers
147
148from the build tree:
149 - all C source files + headers
150 - all .gcda and .gcno files
151 - all links to directories
152
153It is important to note that these files need to be placed into the
154exact same file system location on the test machine as on the build
155machine. If any of the path components is symbolic link, the actual
156directory needs to be used instead (due to make's CURDIR handling).
157
158b) gcov is run on the BUILD machine
159
160The following files need to be copied after each test case from test
161to build machine:
162
163from the gcov directory in sysfs:
164 - all .gcda files
165 - all links to .gcno files
166
167These files can be copied to any location on the build machine. gcov
168must then be called with the -o option pointing to that directory.
169
170Example directory setup on the build machine:
171
172 /tmp/linux: kernel source tree
173 /tmp/out: kernel build directory as specified by make O=
174 /tmp/coverage: location of the files copied from the test machine
175
176 [user@build] cd /tmp/out
177 [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
178
179
1807. Troubleshooting
181==================
182
183Problem: Compilation aborts during linker step.
184Cause: Profiling flags are specified for source files which are not
185 linked to the main kernel or which are linked by a custom
186 linker procedure.
187Solution: Exclude affected source files from profiling by specifying
188 GCOV_PROFILE := n or GCOV_PROFILE_basename.o := n in the
189 corresponding Makefile.
190
191
192Appendix A: gather_on_build.sh
193==============================
194
195Sample script to gather coverage meta files on the build machine
196(see 6a):
197
198#!/bin/bash
199
200KSRC=$1
201KOBJ=$2
202DEST=$3
203
204if [ -z "$KSRC" ] || [ -z "$KOBJ" ] || [ -z "$DEST" ]; then
205 echo "Usage: $0 <ksrc directory> <kobj directory> <output.tar.gz>" >&2
206 exit 1
207fi
208
209KSRC=$(cd $KSRC; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
210KOBJ=$(cd $KOBJ; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
211
212find $KSRC $KOBJ \( -name '*.gcno' -o -name '*.[ch]' -o -type l \) -a \
213 -perm /u+r,g+r | tar cfz $DEST -P -T -
214
215if [ $? -eq 0 ] ; then
216 echo "$DEST successfully created, copy to test system and unpack with:"
217 echo " tar xfz $DEST -P"
218else
219 echo "Could not create file $DEST"
220fi
221
222
223Appendix B: gather_on_test.sh
224=============================
225
226Sample script to gather coverage data files on the test machine
227(see 6b):
228
229#!/bin/bash
230
231DEST=$1
232GCDA=/sys/kernel/debug/gcov
233
234if [ -z "$DEST" ] ; then
235 echo "Usage: $0 <output.tar.gz>" >&2
236 exit 1
237fi
238
239find $GCDA -name '*.gcno' -o -name '*.gcda' | tar cfz $DEST -T -
240
241if [ $? -eq 0 ] ; then
242 echo "$DEST successfully created, copy to build system and unpack with:"
243 echo " tar xfz $DEST"
244else
245 echo "Could not create file $DEST"
246fi
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index b1b988701247..e4b6985044a2 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -123,7 +123,10 @@ platform-specific implementation issue.
123 123
124Using GPIOs 124Using GPIOs
125----------- 125-----------
126One of the first things to do with a GPIO, often in board setup code when 126The first thing a system should do with a GPIO is allocate it, using
127the gpio_request() call; see later.
128
129One of the next things to do with a GPIO, often in board setup code when
127setting up a platform_device using the GPIO, is mark its direction: 130setting up a platform_device using the GPIO, is mark its direction:
128 131
129 /* set as input or output, returning 0 or negative errno */ 132 /* set as input or output, returning 0 or negative errno */
@@ -141,8 +144,8 @@ This helps avoid signal glitching during system startup.
141 144
142For compatibility with legacy interfaces to GPIOs, setting the direction 145For compatibility with legacy interfaces to GPIOs, setting the direction
143of a GPIO implicitly requests that GPIO (see below) if it has not been 146of a GPIO implicitly requests that GPIO (see below) if it has not been
144requested already. That compatibility may be removed in the future; 147requested already. That compatibility is being removed from the optional
145explicitly requesting GPIOs is strongly preferred. 148gpiolib framework.
146 149
147Setting the direction can fail if the GPIO number is invalid, or when 150Setting the direction can fail if the GPIO number is invalid, or when
148that particular GPIO can't be used in that mode. It's generally a bad 151that particular GPIO can't be used in that mode. It's generally a bad
@@ -195,7 +198,7 @@ This requires sleeping, which can't be done from inside IRQ handlers.
195 198
196Platforms that support this type of GPIO distinguish them from other GPIOs 199Platforms that support this type of GPIO distinguish them from other GPIOs
197by returning nonzero from this call (which requires a valid GPIO number, 200by returning nonzero from this call (which requires a valid GPIO number,
198either explicitly or implicitly requested): 201which should have been previously allocated with gpio_request):
199 202
200 int gpio_cansleep(unsigned gpio); 203 int gpio_cansleep(unsigned gpio);
201 204
@@ -212,10 +215,9 @@ for GPIOs that can't be accessed from IRQ handlers, these calls act the
212same as the spinlock-safe calls. 215same as the spinlock-safe calls.
213 216
214 217
215Claiming and Releasing GPIOs (OPTIONAL) 218Claiming and Releasing GPIOs
216--------------------------------------- 219----------------------------
217To help catch system configuration errors, two calls are defined. 220To help catch system configuration errors, two calls are defined.
218However, many platforms don't currently support this mechanism.
219 221
220 /* request GPIO, returning 0 or negative errno. 222 /* request GPIO, returning 0 or negative errno.
221 * non-null labels may be useful for diagnostics. 223 * non-null labels may be useful for diagnostics.
@@ -244,13 +246,6 @@ Some platforms may also use knowledge about what GPIOs are active for
244power management, such as by powering down unused chip sectors and, more 246power management, such as by powering down unused chip sectors and, more
245easily, gating off unused clocks. 247easily, gating off unused clocks.
246 248
247These two calls are optional because not not all current Linux platforms
248offer such functionality in their GPIO support; a valid implementation
249could return success for all gpio_request() calls. Unlike the other calls,
250the state they represent doesn't normally match anything from a hardware
251register; it's just a software bitmap which clearly is not necessary for
252correct operation of hardware or (bug free) drivers.
253
254Note that requesting a GPIO does NOT cause it to be configured in any 249Note that requesting a GPIO does NOT cause it to be configured in any
255way; it just marks that GPIO as in use. Separate code must handle any 250way; it just marks that GPIO as in use. Separate code must handle any
256pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown). 251pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
@@ -463,7 +458,7 @@ debugfs interface, since it provides control over GPIO direction and
463value instead of just showing a gpio state summary. Plus, it could be 458value instead of just showing a gpio state summary. Plus, it could be
464present on production systems without debugging support. 459present on production systems without debugging support.
465 460
466Given approprate hardware documentation for the system, userspace could 461Given appropriate hardware documentation for the system, userspace could
467know for example that GPIO #23 controls the write protect line used to 462know for example that GPIO #23 controls the write protect line used to
468protect boot loader segments in flash memory. System upgrade procedures 463protect boot loader segments in flash memory. System upgrade procedures
469may need to temporarily remove that protection, first importing a GPIO, 464may need to temporarily remove that protection, first importing a GPIO,
diff --git a/Documentation/hwmon/ds1621 b/Documentation/hwmon/ds1621
index 1fee6f1e6bc5..5e97f333c4df 100644
--- a/Documentation/hwmon/ds1621
+++ b/Documentation/hwmon/ds1621
@@ -49,12 +49,9 @@ of up to +/- 0.5 degrees even when compared against precise temperature
49readings. Be sure to have a high vs. low temperature limit gap of al least 49readings. Be sure to have a high vs. low temperature limit gap of al least
501.0 degree Celsius to avoid Tout "bouncing", though! 501.0 degree Celsius to avoid Tout "bouncing", though!
51 51
52As for alarms, you can read the alarm status of the DS1621 via the 'alarms' 52The alarm bits are set when the high or low limits are met or exceeded and
53/sys file interface. The result consists mainly of bit 6 and 5 of the 53are reset by the module as soon as the respective temperature ranges are
54configuration register of the chip; bit 6 (0x40 or 64) is the high alarm 54left.
55bit and bit 5 (0x20 or 32) the low one. These bits are set when the high or
56low limits are met or exceeded and are reset by the module as soon as the
57respective temperature ranges are left.
58 55
59The alarm registers are in no way suitable to find out about the actual 56The alarm registers are in no way suitable to find out about the actual
60status of Tout. They will only tell you about its history, whether or not 57status of Tout. They will only tell you about its history, whether or not
@@ -64,45 +61,3 @@ with neither of the alarms set.
64 61
65Temperature conversion of the DS1621 takes up to 1000ms; internal access to 62Temperature conversion of the DS1621 takes up to 1000ms; internal access to
66non-volatile registers may last for 10ms or below. 63non-volatile registers may last for 10ms or below.
67
68High Accuracy Temperature Reading
69---------------------------------
70
71As said before, the temperature issued via the 9-bit i2c-bus data is
72somewhat arbitrary. Internally, the temperature conversion is of a
73different kind that is explained (not so...) well in the DS1621 data sheet.
74To cut the long story short: Inside the DS1621 there are two oscillators,
75both of them biassed by a temperature coefficient.
76
77Higher resolution of the temperature reading can be achieved using the
78internal projection, which means taking account of REG_COUNT and REG_SLOPE
79(the driver manages them):
80
81Taken from Dallas Semiconductors App Note 068: 'Increasing Temperature
82Resolution on the DS1620' and App Note 105: 'High Resolution Temperature
83Measurement with Dallas Direct-to-Digital Temperature Sensors'
84
85- Read the 9-bit temperature and strip the LSB (Truncate the .5 degs)
86- The resulting value is TEMP_READ.
87- Then, read REG_COUNT.
88- And then, REG_SLOPE.
89
90 TEMP = TEMP_READ - 0.25 + ((REG_SLOPE - REG_COUNT) / REG_SLOPE)
91
92Note that this is what the DONE bit in the DS1621 configuration register is
93good for: Internally, one temperature conversion takes up to 1000ms. Before
94that conversion is complete you will not be able to read valid things out
95of REG_COUNT and REG_SLOPE. The DONE bit, as you may have guessed by now,
96tells you whether the conversion is complete ("done", in plain English) and
97thus, whether the values you read are good or not.
98
99The DS1621 has two modes of operation: "Continuous" conversion, which can
100be understood as the default stand-alone mode where the chip gets the
101temperature and controls external devices via its Tout pin or tells other
102i2c's about it if they care. The other mode is called "1SHOT", that means
103that it only figures out about the temperature when it is explicitly told
104to do so; this can be seen as power saving mode.
105
106Now if you want to read REG_COUNT and REG_SLOPE, you have to either stop
107the continuous conversions until the contents of these registers are valid,
108or, in 1SHOT mode, you have to have one conversion made.
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index a8321267b5b6..bee4c30bc1e2 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg
@@ -2,14 +2,18 @@ Kernel driver f71882fg
2====================== 2======================
3 3
4Supported chips: 4Supported chips:
5 * Fintek F71882FG and F71883FG 5 * Fintek F71858FG
6 Prefix: 'f71882fg' 6 Prefix: 'f71858fg'
7 Addresses scanned: none, address read from Super I/O config space 7 Addresses scanned: none, address read from Super I/O config space
8 Datasheet: Available from the Fintek website 8 Datasheet: Available from the Fintek website
9 * Fintek F71862FG and F71863FG 9 * Fintek F71862FG and F71863FG
10 Prefix: 'f71862fg' 10 Prefix: 'f71862fg'
11 Addresses scanned: none, address read from Super I/O config space 11 Addresses scanned: none, address read from Super I/O config space
12 Datasheet: Available from the Fintek website 12 Datasheet: Available from the Fintek website
13 * Fintek F71882FG and F71883FG
14 Prefix: 'f71882fg'
15 Addresses scanned: none, address read from Super I/O config space
16 Datasheet: Available from the Fintek website
13 * Fintek F8000 17 * Fintek F8000
14 Prefix: 'f8000' 18 Prefix: 'f8000'
15 Addresses scanned: none, address read from Super I/O config space 19 Addresses scanned: none, address read from Super I/O config space
@@ -66,13 +70,13 @@ printed when loading the driver.
66 70
67Three different fan control modes are supported; the mode number is written 71Three different fan control modes are supported; the mode number is written
68to the pwm#_enable file. Note that not all modes are supported on all 72to the pwm#_enable file. Note that not all modes are supported on all
69chips, and some modes may only be available in RPM / PWM mode on the F8000. 73chips, and some modes may only be available in RPM / PWM mode.
70Writing an unsupported mode will result in an invalid parameter error. 74Writing an unsupported mode will result in an invalid parameter error.
71 75
72* 1: Manual mode 76* 1: Manual mode
73 You ask for a specific PWM duty cycle / DC voltage or a specific % of 77 You ask for a specific PWM duty cycle / DC voltage or a specific % of
74 fan#_full_speed by writing to the pwm# file. This mode is only 78 fan#_full_speed by writing to the pwm# file. This mode is only
75 available on the F8000 if the fan channel is in RPM mode. 79 available on the F71858FG / F8000 if the fan channel is in RPM mode.
76 80
77* 2: Normal auto mode 81* 2: Normal auto mode
78 You can define a number of temperature/fan speed trip points, which % the 82 You can define a number of temperature/fan speed trip points, which % the
diff --git a/Documentation/hwmon/g760a b/Documentation/hwmon/g760a
new file mode 100644
index 000000000000..e032eeb75629
--- /dev/null
+++ b/Documentation/hwmon/g760a
@@ -0,0 +1,36 @@
1Kernel driver g760a
2===================
3
4Supported chips:
5 * Global Mixed-mode Technology Inc. G760A
6 Prefix: 'g760a'
7 Datasheet: Publicly available at the GMT website
8 http://www.gmt.com.tw/datasheet/g760a.pdf
9
10Author: Herbert Valerio Riedel <hvr@gnu.org>
11
12Description
13-----------
14
15The GMT G760A Fan Speed PWM Controller is connected directly to a fan
16and performs closed-loop control of the fan speed.
17
18The fan speed is programmed by setting the period via 'pwm1' of two
19consecutive speed pulses. The period is defined in terms of clock
20cycle counts of an assumed 32kHz clock source.
21
22Setting a period of 0 stops the fan; setting the period to 255 sets
23fan to maximum speed.
24
25The measured fan rotation speed returned via 'fan1_input' is derived
26from the measured speed pulse period by assuming again a 32kHz clock
27source and a 2 pulse-per-revolution fan.
28
29The 'alarms' file provides access to the two alarm bits provided by
30the G760A chip's status register: Bit 0 is set when the actual fan
31speed differs more than 20% with respect to the programmed fan speed;
32bit 1 is set when fan speed is below 1920 RPM.
33
34The g760a driver will not update its values more frequently than every
35other second; reading them more often will do no harm, but will return
36'old' values.
diff --git a/Documentation/hwmon/ibmaem b/Documentation/hwmon/ibmaem
index e98bdfea3467..1e0d59e000b4 100644
--- a/Documentation/hwmon/ibmaem
+++ b/Documentation/hwmon/ibmaem
@@ -7,7 +7,7 @@ henceforth as AEM.
7Supported systems: 7Supported systems:
8 * Any recent IBM System X server with AEM support. 8 * Any recent IBM System X server with AEM support.
9 This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2, 9 This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2,
10 x3950 M2, and certain HS2x/LS2x/QS2x blades. The IPMI host interface 10 x3950 M2, and certain HC10/HS2x/LS2x/QS2x blades. The IPMI host interface
11 driver ("ipmi-si") needs to be loaded for this driver to do anything. 11 driver ("ipmi-si") needs to be loaded for this driver to do anything.
12 Prefix: 'ibmaem' 12 Prefix: 'ibmaem'
13 Datasheet: Not available 13 Datasheet: Not available
diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d
index 287f8c902656..effe949a7282 100644
--- a/Documentation/hwmon/lis3lv02d
+++ b/Documentation/hwmon/lis3lv02d
@@ -1,11 +1,11 @@
1Kernel driver lis3lv02d 1Kernel driver lis3lv02d
2================== 2=======================
3 3
4Supported chips: 4Supported chips:
5 5
6 * STMicroelectronics LIS3LV02DL and LIS3LV02DQ 6 * STMicroelectronics LIS3LV02DL and LIS3LV02DQ
7 7
8Author: 8Authors:
9 Yan Burman <burman.yan@gmail.com> 9 Yan Burman <burman.yan@gmail.com>
10 Eric Piel <eric.piel@tremplin-utc.net> 10 Eric Piel <eric.piel@tremplin-utc.net>
11 11
@@ -15,7 +15,7 @@ Description
15 15
16This driver provides support for the accelerometer found in various HP 16This driver provides support for the accelerometer found in various HP
17laptops sporting the feature officially called "HP Mobile Data 17laptops sporting the feature officially called "HP Mobile Data
18Protection System 3D" or "HP 3D DriveGuard". It detect automatically 18Protection System 3D" or "HP 3D DriveGuard". It detects automatically
19laptops with this sensor. Known models (for now the HP 2133, nc6420, 19laptops with this sensor. Known models (for now the HP 2133, nc6420,
20nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis 20nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis
21automatically oriented on standard way (eg: you can directly play 21automatically oriented on standard way (eg: you can directly play
@@ -27,7 +27,7 @@ position - 3D position that the accelerometer reports. Format: "(x,y,z)"
27calibrate - read: values (x, y, z) that are used as the base for input 27calibrate - read: values (x, y, z) that are used as the base for input
28 class device operation. 28 class device operation.
29 write: forces the base to be recalibrated with the current 29 write: forces the base to be recalibrated with the current
30 position. 30 position.
31rate - reports the sampling rate of the accelerometer device in HZ 31rate - reports the sampling rate of the accelerometer device in HZ
32 32
33This driver also provides an absolute input class device, allowing 33This driver also provides an absolute input class device, allowing
@@ -48,7 +48,7 @@ For better compatibility between the various laptops. The values reported by
48the accelerometer are converted into a "standard" organisation of the axes 48the accelerometer are converted into a "standard" organisation of the axes
49(aka "can play neverball out of the box"): 49(aka "can play neverball out of the box"):
50 * When the laptop is horizontal the position reported is about 0 for X and Y 50 * When the laptop is horizontal the position reported is about 0 for X and Y
51and a positive value for Z 51 and a positive value for Z
52 * If the left side is elevated, X increases (becomes positive) 52 * If the left side is elevated, X increases (becomes positive)
53 * If the front side (where the touchpad is) is elevated, Y decreases 53 * If the front side (where the touchpad is) is elevated, Y decreases
54 (becomes negative) 54 (becomes negative)
@@ -59,3 +59,13 @@ email to the authors to add it to the database. When reporting a new
59laptop, please include the output of "dmidecode" plus the value of 59laptop, please include the output of "dmidecode" plus the value of
60/sys/devices/platform/lis3lv02d/position in these four cases. 60/sys/devices/platform/lis3lv02d/position in these four cases.
61 61
62Q&A
63---
64
65Q: How do I safely simulate freefall? I have an HP "portable
66workstation" which has about 3.5kg and a plastic case, so letting it
67fall to the ground is out of question...
68
69A: The sensor is pretty sensitive, so your hands can do it. Lift it
70into free space, follow the fall with your hands for like 10
71centimeters. That should be enough to trigger the detection.
diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215
new file mode 100644
index 000000000000..2e6a21eb656c
--- /dev/null
+++ b/Documentation/hwmon/ltc4215
@@ -0,0 +1,50 @@
1Kernel driver ltc4215
2=====================
3
4Supported chips:
5 * Linear Technology LTC4215
6 Prefix: 'ltc4215'
7 Addresses scanned: 0x44
8 Datasheet:
9 http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1163,P17572,D12697
10
11Author: Ira W. Snyder <iws@ovro.caltech.edu>
12
13
14Description
15-----------
16
17The LTC4215 controller allows a board to be safely inserted and removed
18from a live backplane.
19
20
21Usage Notes
22-----------
23
24This driver does not probe for LTC4215 devices, due to the fact that some
25of the possible addresses are unfriendly to probing. You will need to use
26the "force" parameter to tell the driver where to find the device.
27
28Example: the following will load the driver for an LTC4215 at address 0x44
29on I2C bus #0:
30$ modprobe ltc4215 force=0,0x44
31
32
33Sysfs entries
34-------------
35
36The LTC4215 has built-in limits for overvoltage, undervoltage, and
37undercurrent warnings. This makes it very likely that the reference
38circuit will be used.
39
40in1_input input voltage
41in2_input output voltage
42
43in1_min_alarm input undervoltage alarm
44in1_max_alarm input overvoltage alarm
45
46curr1_input current
47curr1_max_alarm overcurrent alarm
48
49power1_input power usage
50power1_alarm power bad alarm
diff --git a/Documentation/i2c/chips/pcf8591 b/Documentation/hwmon/pcf8591
index 5628fcf4207f..5628fcf4207f 100644
--- a/Documentation/i2c/chips/pcf8591
+++ b/Documentation/hwmon/pcf8591
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 6dbfd5efd991..dcbd502c8792 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -70,6 +70,7 @@ are interpreted as 0! For more on how written strings are interpreted see the
70[0-*] denotes any positive number starting from 0 70[0-*] denotes any positive number starting from 0
71[1-*] denotes any positive number starting from 1 71[1-*] denotes any positive number starting from 1
72RO read only value 72RO read only value
73WO write only value
73RW read/write value 74RW read/write value
74 75
75Read/write values may be read-only for some chips, depending on the 76Read/write values may be read-only for some chips, depending on the
@@ -150,6 +151,11 @@ fan[1-*]_min Fan minimum value
150 Unit: revolution/min (RPM) 151 Unit: revolution/min (RPM)
151 RW 152 RW
152 153
154fan[1-*]_max Fan maximum value
155 Unit: revolution/min (RPM)
156 Only rarely supported by the hardware.
157 RW
158
153fan[1-*]_input Fan input value. 159fan[1-*]_input Fan input value.
154 Unit: revolution/min (RPM) 160 Unit: revolution/min (RPM)
155 RO 161 RO
@@ -290,6 +296,24 @@ temp[1-*]_label Suggested temperature channel label.
290 user-space. 296 user-space.
291 RO 297 RO
292 298
299temp[1-*]_lowest
300 Historical minimum temperature
301 Unit: millidegree Celsius
302 RO
303
304temp[1-*]_highest
305 Historical maximum temperature
306 Unit: millidegree Celsius
307 RO
308
309temp[1-*]_reset_history
310 Reset temp_lowest and temp_highest
311 WO
312
313temp_reset_history
314 Reset temp_lowest and temp_highest for all sensors
315 WO
316
293Some chips measure temperature using external thermistors and an ADC, and 317Some chips measure temperature using external thermistors and an ADC, and
294report the temperature measurement as a voltage. Converting this voltage 318report the temperature measurement as a voltage. Converting this voltage
295back to a temperature (or the other way around for limits) requires 319back to a temperature (or the other way around for limits) requires
@@ -365,6 +389,7 @@ energy[1-*]_input Cumulative energy use
365 Unit: microJoule 389 Unit: microJoule
366 RO 390 RO
367 391
392
368********** 393**********
369* Alarms * 394* Alarms *
370********** 395**********
@@ -389,6 +414,7 @@ OR
389in[0-*]_min_alarm 414in[0-*]_min_alarm
390in[0-*]_max_alarm 415in[0-*]_max_alarm
391fan[1-*]_min_alarm 416fan[1-*]_min_alarm
417fan[1-*]_max_alarm
392temp[1-*]_min_alarm 418temp[1-*]_min_alarm
393temp[1-*]_max_alarm 419temp[1-*]_max_alarm
394temp[1-*]_crit_alarm 420temp[1-*]_crit_alarm
@@ -453,6 +479,27 @@ beep_mask Bitmask for beep.
453 RW 479 RW
454 480
455 481
482***********************
483* Intrusion detection *
484***********************
485
486intrusion[0-*]_alarm
487 Chassis intrusion detection
488 0: OK
489 1: intrusion detected
490 RW
491 Contrary to regular alarm flags which clear themselves
492 automatically when read, this one sticks until cleared by
493 the user. This is done by writing 0 to the file. Writing
494 other values is unsupported.
495
496intrusion[0-*]_beep
497 Chassis intrusion beep
498 0: disable
499 1: enable
500 RW
501
502
456sysfs attribute writes interpretation 503sysfs attribute writes interpretation
457------------------------------------- 504-------------------------------------
458 505
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401
new file mode 100644
index 000000000000..9fc447249212
--- /dev/null
+++ b/Documentation/hwmon/tmp401
@@ -0,0 +1,42 @@
1Kernel driver tmp401
2====================
3
4Supported chips:
5 * Texas Instruments TMP401
6 Prefix: 'tmp401'
7 Addresses scanned: I2C 0x4c
8 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html
9 * Texas Instruments TMP411
10 Prefix: 'tmp411'
11 Addresses scanned: I2C 0x4c
12 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html
13
14Authors:
15 Hans de Goede <hdegoede@redhat.com>
16 Andre Prendel <andre.prendel@gmx.de>
17
18Description
19-----------
20
21This driver implements support for Texas Instruments TMP401 and
22TMP411 chips. These chips implements one remote and one local
23temperature sensor. Temperature is measured in degrees
24Celsius. Resolution of the remote sensor is 0.0625 degree. Local
25sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not
26supported by the driver so far, so using the default resolution of 0.5
27degree).
28
29The driver provides the common sysfs-interface for temperatures (see
30/Documentation/hwmon/sysfs-interface under Temperatures).
31
32The TMP411 chip is compatible with TMP401. It provides some additional
33features.
34
35* Minimum and Maximum temperature measured since power-on, chip-reset
36
37 Exported via sysfs attributes tempX_lowest and tempX_highest.
38
39* Reset of historical minimum/maximum temperature measurements
40
41 Exported via sysfs attribute temp_reset_history. Writing 1 to this
42 file triggers a reset.
diff --git a/Documentation/hwmon/w83627ehf b/Documentation/hwmon/w83627ehf
index d6e1ae30fa6e..02b74899edaf 100644
--- a/Documentation/hwmon/w83627ehf
+++ b/Documentation/hwmon/w83627ehf
@@ -2,30 +2,44 @@ Kernel driver w83627ehf
2======================= 2=======================
3 3
4Supported chips: 4Supported chips:
5 * Winbond W83627EHF/EHG/DHG (ISA access ONLY) 5 * Winbond W83627EHF/EHG (ISA access ONLY)
6 Prefix: 'w83627ehf' 6 Prefix: 'w83627ehf'
7 Addresses scanned: ISA address retrieved from Super I/O registers 7 Addresses scanned: ISA address retrieved from Super I/O registers
8 Datasheet: 8 Datasheet:
9 http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83627EHF_%20W83627EHGb.pdf 9 http://www.nuvoton.com.tw/NR/rdonlyres/A6A258F0-F0C9-4F97-81C0-C4D29E7E943E/0/W83627EHF.pdf
10 DHG datasheet confidential. 10 * Winbond W83627DHG
11 Prefix: 'w83627dhg'
12 Addresses scanned: ISA address retrieved from Super I/O registers
13 Datasheet:
14 http://www.nuvoton.com.tw/NR/rdonlyres/7885623D-A487-4CF9-A47F-30C5F73D6FE6/0/W83627DHG.pdf
15 * Winbond W83627DHG-P
16 Prefix: 'w83627dhg'
17 Addresses scanned: ISA address retrieved from Super I/O registers
18 Datasheet: not available
19 * Winbond W83667HG
20 Prefix: 'w83667hg'
21 Addresses scanned: ISA address retrieved from Super I/O registers
22 Datasheet: not available
11 23
12Authors: 24Authors:
13 Jean Delvare <khali@linux-fr.org> 25 Jean Delvare <khali@linux-fr.org>
14 Yuan Mu (Winbond) 26 Yuan Mu (Winbond)
15 Rudolf Marek <r.marek@assembler.cz> 27 Rudolf Marek <r.marek@assembler.cz>
16 David Hubbard <david.c.hubbard@gmail.com> 28 David Hubbard <david.c.hubbard@gmail.com>
29 Gong Jun <JGong@nuvoton.com>
17 30
18Description 31Description
19----------- 32-----------
20 33
21This driver implements support for the Winbond W83627EHF, W83627EHG, and 34This driver implements support for the Winbond W83627EHF, W83627EHG,
22W83627DHG super I/O chips. We will refer to them collectively as Winbond chips. 35W83627DHG, W83627DHG-P and W83667HG super I/O chips. We will refer to them
36collectively as Winbond chips.
23 37
24The chips implement three temperature sensors, five fan rotation 38The chips implement three temperature sensors, five fan rotation
25speed sensors, ten analog voltage sensors (only nine for the 627DHG), one 39speed sensors, ten analog voltage sensors (only nine for the 627DHG), one
26VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG), alarms with beep 40VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG and 667HG), alarms
27warnings (control unimplemented), and some automatic fan regulation 41with beep warnings (control unimplemented), and some automatic fan
28strategies (plus manual fan control mode). 42regulation strategies (plus manual fan control mode).
29 43
30Temperatures are measured in degrees Celsius and measurement resolution is 1 44Temperatures are measured in degrees Celsius and measurement resolution is 1
31degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when 45degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when
@@ -54,7 +68,8 @@ follows:
54temp1 -> pwm1 68temp1 -> pwm1
55temp2 -> pwm2 69temp2 -> pwm2
56temp3 -> pwm3 70temp3 -> pwm3
57prog -> pwm4 (the programmable setting is not supported by the driver) 71prog -> pwm4 (not on 667HG; the programmable setting is not supported by
72 the driver)
58 73
59/sys files 74/sys files
60---------- 75----------
@@ -124,3 +139,6 @@ done in the driver for all register addresses.
124The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and 139The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and
125the ICH8 southbridge gets that data via PECI from the DHG, so that the 140the ICH8 southbridge gets that data via PECI from the DHG, so that the
126southbridge drives the fans. And the DHG supports SST, a one-wire serial bus. 141southbridge drives the fans. And the DHG supports SST, a one-wire serial bus.
142
143The DHG-P has an additional automatic fan speed control mode named Smart Fan
144(TM) III+. This mode is not yet supported by the driver.
diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
index fae3495bcbaf..9698c396b830 100644
--- a/Documentation/i2c/busses/i2c-nforce2
+++ b/Documentation/i2c/busses/i2c-nforce2
@@ -7,10 +7,14 @@ Supported adapters:
7 * nForce3 250Gb MCP 10de:00E4 7 * nForce3 250Gb MCP 10de:00E4
8 * nForce4 MCP 10de:0052 8 * nForce4 MCP 10de:0052
9 * nForce4 MCP-04 10de:0034 9 * nForce4 MCP-04 10de:0034
10 * nForce4 MCP51 10de:0264 10 * nForce MCP51 10de:0264
11 * nForce4 MCP55 10de:0368 11 * nForce MCP55 10de:0368
12 * nForce4 MCP61 10de:03EB 12 * nForce MCP61 10de:03EB
13 * nForce4 MCP65 10de:0446 13 * nForce MCP65 10de:0446
14 * nForce MCP67 10de:0542
15 * nForce MCP73 10de:07D8
16 * nForce MCP78S 10de:0752
17 * nForce MCP79 10de:0AA2
14 18
15Datasheet: not publicly available, but seems to be similar to the 19Datasheet: not publicly available, but seems to be similar to the
16 AMD-8111 SMBus 2.0 adapter. 20 AMD-8111 SMBus 2.0 adapter.
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index cfcebb10d14e..c269aaa2f26a 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -20,6 +20,8 @@ platform_device with the base address and interrupt number. The
20dev.platform_data of the device should also point to a struct 20dev.platform_data of the device should also point to a struct
21ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the 21ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the
22distance between registers and the input clock speed. 22distance between registers and the input clock speed.
23There is also a possibility to attach a list of i2c_board_info which
24the i2c-ocores driver will add to the bus upon creation.
23 25
24E.G. something like: 26E.G. something like:
25 27
@@ -36,9 +38,24 @@ static struct resource ocores_resources[] = {
36 }, 38 },
37}; 39};
38 40
41/* optional board info */
42struct i2c_board_info ocores_i2c_board_info[] = {
43 {
44 I2C_BOARD_INFO("tsc2003", 0x48),
45 .platform_data = &tsc2003_platform_data,
46 .irq = TSC_IRQ
47 },
48 {
49 I2C_BOARD_INFO("adv7180", 0x42 >> 1),
50 .irq = ADV_IRQ
51 }
52};
53
39static struct ocores_i2c_platform_data myi2c_data = { 54static struct ocores_i2c_platform_data myi2c_data = {
40 .regstep = 2, /* two bytes between registers */ 55 .regstep = 2, /* two bytes between registers */
41 .clock_khz = 50000, /* input clock of 50MHz */ 56 .clock_khz = 50000, /* input clock of 50MHz */
57 .devices = ocores_i2c_board_info, /* optional table of devices */
58 .num_devices = ARRAY_SIZE(ocores_i2c_board_info), /* table size */
42}; 59};
43 60
44static struct platform_device myi2c = { 61static struct platform_device myi2c = {
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index ef1efa79b1df..f889481762b5 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -4,7 +4,7 @@ Supported adapters:
4 * Intel 82371AB PIIX4 and PIIX4E 4 * Intel 82371AB PIIX4 and PIIX4E
5 * Intel 82443MX (440MX) 5 * Intel 82443MX (440MX)
6 Datasheet: Publicly available at the Intel website 6 Datasheet: Publicly available at the Intel website
7 * ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges 7 * ServerWorks OSB4, CSB5, CSB6, HT-1000 and HT-1100 southbridges
8 Datasheet: Only available via NDA from ServerWorks 8 Datasheet: Only available via NDA from ServerWorks
9 * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges 9 * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
10 Datasheet: Not publicly available 10 Datasheet: Not publicly available
diff --git a/Documentation/i2c/busses/i2c-viapro b/Documentation/i2c/busses/i2c-viapro
index 22efedf60c87..2e758b0e9456 100644
--- a/Documentation/i2c/busses/i2c-viapro
+++ b/Documentation/i2c/busses/i2c-viapro
@@ -19,6 +19,9 @@ Supported adapters:
19 * VIA Technologies, Inc. VX800/VX820 19 * VIA Technologies, Inc. VX800/VX820
20 Datasheet: available on http://linux.via.com.tw 20 Datasheet: available on http://linux.via.com.tw
21 21
22 * VIA Technologies, Inc. VX855/VX875
23 Datasheet: Availability unknown
24
22Authors: 25Authors:
23 Kyösti Mälkki <kmalkki@cc.hut.fi>, 26 Kyösti Mälkki <kmalkki@cc.hut.fi>,
24 Mark D. Studebaker <mdsxyz123@yahoo.com>, 27 Mark D. Studebaker <mdsxyz123@yahoo.com>,
@@ -53,6 +56,7 @@ Your lspci -n listing must show one of these :
53 device 1106:3287 (VT8251) 56 device 1106:3287 (VT8251)
54 device 1106:8324 (CX700) 57 device 1106:8324 (CX700)
55 device 1106:8353 (VX800/VX820) 58 device 1106:8353 (VX800/VX820)
59 device 1106:8409 (VX855/VX875)
56 60
57If none of these show up, you should look in the BIOS for settings like 61If none of these show up, you should look in the BIOS for settings like
58enable ACPI / SMBus or even USB. 62enable ACPI / SMBus or even USB.
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
new file mode 100644
index 000000000000..c740b7b41088
--- /dev/null
+++ b/Documentation/i2c/instantiating-devices
@@ -0,0 +1,211 @@
1How to instantiate I2C devices
2==============================
3
4Unlike PCI or USB devices, I2C devices are not enumerated at the hardware
5level. Instead, the software must know which devices are connected on each
6I2C bus segment, and what address these devices are using. For this
7reason, the kernel code must instantiate I2C devices explicitly. There are
8several ways to achieve this, depending on the context and requirements.
9
10
11Method 1: Declare the I2C devices by bus number
12-----------------------------------------------
13
14This method is appropriate when the I2C bus is a system bus as is the case
15for many embedded systems. On such systems, each I2C bus has a number
16which is known in advance. It is thus possible to pre-declare the I2C
17devices which live on this bus. This is done with an array of struct
18i2c_board_info which is registered by calling i2c_register_board_info().
19
20Example (from omap2 h4):
21
22static struct i2c_board_info __initdata h4_i2c_board_info[] = {
23 {
24 I2C_BOARD_INFO("isp1301_omap", 0x2d),
25 .irq = OMAP_GPIO_IRQ(125),
26 },
27 { /* EEPROM on mainboard */
28 I2C_BOARD_INFO("24c01", 0x52),
29 .platform_data = &m24c01,
30 },
31 { /* EEPROM on cpu card */
32 I2C_BOARD_INFO("24c01", 0x57),
33 .platform_data = &m24c01,
34 },
35};
36
37static void __init omap_h4_init(void)
38{
39 (...)
40 i2c_register_board_info(1, h4_i2c_board_info,
41 ARRAY_SIZE(h4_i2c_board_info));
42 (...)
43}
44
45The above code declares 3 devices on I2C bus 1, including their respective
46addresses and custom data needed by their drivers. When the I2C bus in
47question is registered, the I2C devices will be instantiated automatically
48by i2c-core.
49
50The devices will be automatically unbound and destroyed when the I2C bus
51they sit on goes away (if ever.)
52
53
54Method 2: Instantiate the devices explicitly
55--------------------------------------------
56
57This method is appropriate when a larger device uses an I2C bus for
58internal communication. A typical case is TV adapters. These can have a
59tuner, a video decoder, an audio decoder, etc. usually connected to the
60main chip by the means of an I2C bus. You won't know the number of the I2C
61bus in advance, so the method 1 described above can't be used. Instead,
62you can instantiate your I2C devices explicitly. This is done by filling
63a struct i2c_board_info and calling i2c_new_device().
64
65Example (from the sfe4001 network driver):
66
67static struct i2c_board_info sfe4001_hwmon_info = {
68 I2C_BOARD_INFO("max6647", 0x4e),
69};
70
71int sfe4001_init(struct efx_nic *efx)
72{
73 (...)
74 efx->board_info.hwmon_client =
75 i2c_new_device(&efx->i2c_adap, &sfe4001_hwmon_info);
76
77 (...)
78}
79
80The above code instantiates 1 I2C device on the I2C bus which is on the
81network adapter in question.
82
83A variant of this is when you don't know for sure if an I2C device is
84present or not (for example for an optional feature which is not present
85on cheap variants of a board but you have no way to tell them apart), or
86it may have different addresses from one board to the next (manufacturer
87changing its design without notice). In this case, you can call
88i2c_new_probed_device() instead of i2c_new_device().
89
90Example (from the pnx4008 OHCI driver):
91
92static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
93
94static int __devinit usb_hcd_pnx4008_probe(struct platform_device *pdev)
95{
96 (...)
97 struct i2c_adapter *i2c_adap;
98 struct i2c_board_info i2c_info;
99
100 (...)
101 i2c_adap = i2c_get_adapter(2);
102 memset(&i2c_info, 0, sizeof(struct i2c_board_info));
103 strlcpy(i2c_info.name, "isp1301_pnx", I2C_NAME_SIZE);
104 isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info,
105 normal_i2c);
106 i2c_put_adapter(i2c_adap);
107 (...)
108}
109
110The above code instantiates up to 1 I2C device on the I2C bus which is on
111the OHCI adapter in question. It first tries at address 0x2c, if nothing
112is found there it tries address 0x2d, and if still nothing is found, it
113simply gives up.
114
115The driver which instantiated the I2C device is responsible for destroying
116it on cleanup. This is done by calling i2c_unregister_device() on the
117pointer that was earlier returned by i2c_new_device() or
118i2c_new_probed_device().
119
120
121Method 3: Probe an I2C bus for certain devices
122----------------------------------------------
123
124Sometimes you do not have enough information about an I2C device, not even
125to call i2c_new_probed_device(). The typical case is hardware monitoring
126chips on PC mainboards. There are several dozen models, which can live
127at 25 different addresses. Given the huge number of mainboards out there,
128it is next to impossible to build an exhaustive list of the hardware
129monitoring chips being used. Fortunately, most of these chips have
130manufacturer and device ID registers, so they can be identified by
131probing.
132
133In that case, I2C devices are neither declared nor instantiated
134explicitly. Instead, i2c-core will probe for such devices as soon as their
135drivers are loaded, and if any is found, an I2C device will be
136instantiated automatically. In order to prevent any misbehavior of this
137mechanism, the following restrictions apply:
138* The I2C device driver must implement the detect() method, which
139 identifies a supported device by reading from arbitrary registers.
140* Only buses which are likely to have a supported device and agree to be
141 probed, will be probed. For example this avoids probing for hardware
142 monitoring chips on a TV adapter.
143
144Example:
145See lm90_driver and lm90_detect() in drivers/hwmon/lm90.c
146
147I2C devices instantiated as a result of such a successful probe will be
148destroyed automatically when the driver which detected them is removed,
149or when the underlying I2C bus is itself destroyed, whichever happens
150first.
151
152Those of you familiar with the i2c subsystem of 2.4 kernels and early 2.6
153kernels will find out that this method 3 is essentially similar to what
154was done there. Two significant differences are:
155* Probing is only one way to instantiate I2C devices now, while it was the
156 only way back then. Where possible, methods 1 and 2 should be preferred.
157 Method 3 should only be used when there is no other way, as it can have
158 undesirable side effects.
159* I2C buses must now explicitly say which I2C driver classes can probe
160 them (by the means of the class bitfield), while all I2C buses were
161 probed by default back then. The default is an empty class which means
162 that no probing happens. The purpose of the class bitfield is to limit
163 the aforementioned undesirable side effects.
164
165Once again, method 3 should be avoided wherever possible. Explicit device
166instantiation (methods 1 and 2) is much preferred for it is safer and
167faster.
168
169
170Method 4: Instantiate from user-space
171-------------------------------------
172
173In general, the kernel should know which I2C devices are connected and
174what addresses they live at. However, in certain cases, it does not, so a
175sysfs interface was added to let the user provide the information. This
176interface is made of 2 attribute files which are created in every I2C bus
177directory: new_device and delete_device. Both files are write only and you
178must write the right parameters to them in order to properly instantiate,
179respectively delete, an I2C device.
180
181File new_device takes 2 parameters: the name of the I2C device (a string)
182and the address of the I2C device (a number, typically expressed in
183hexadecimal starting with 0x, but can also be expressed in decimal.)
184
185File delete_device takes a single parameter: the address of the I2C
186device. As no two devices can live at the same address on a given I2C
187segment, the address is sufficient to uniquely identify the device to be
188deleted.
189
190Example:
191# echo eeprom 0x50 > /sys/class/i2c-adapter/i2c-3/new_device
192
193While this interface should only be used when in-kernel device declaration
194can't be done, there is a variety of cases where it can be helpful:
195* The I2C driver usually detects devices (method 3 above) but the bus
196 segment your device lives on doesn't have the proper class bit set and
197 thus detection doesn't trigger.
198* The I2C driver usually detects devices, but your device lives at an
199 unexpected address.
200* The I2C driver usually detects devices, but your device is not detected,
201 either because the detection routine is too strict, or because your
202 device is not officially supported yet but you know it is compatible.
203* You are developing a driver on a test board, where you soldered the I2C
204 device yourself.
205
206This interface is a replacement for the force_* module parameters some I2C
207drivers implement. Being implemented in i2c-core rather than in each
208device driver individually, it is much more efficient, and also has the
209advantage that you do not have to reload the driver to change a setting.
210You can also instantiate the device before the driver is loaded or even
211available, and you don't need to know what driver the device needs.
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index 6b9af7d479c2..7860aafb483d 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -126,19 +126,9 @@ different) configuration information, as do drivers handling chip variants
126that can't be distinguished by protocol probing, or which need some board 126that can't be distinguished by protocol probing, or which need some board
127specific information to operate correctly. 127specific information to operate correctly.
128 128
129Accordingly, the I2C stack now has two models for associating I2C devices
130with their drivers: the original "legacy" model, and a newer one that's
131fully compatible with the Linux 2.6 driver model. These models do not mix,
132since the "legacy" model requires drivers to create "i2c_client" device
133objects after SMBus style probing, while the Linux driver model expects
134drivers to be given such device objects in their probe() routines.
135 129
136The legacy model is deprecated now and will soon be removed, so we no 130Device/Driver Binding
137longer document it here. 131---------------------
138
139
140Standard Driver Model Binding ("New Style")
141-------------------------------------------
142 132
143System infrastructure, typically board-specific initialization code or 133System infrastructure, typically board-specific initialization code or
144boot firmware, reports what I2C devices exist. For example, there may be 134boot firmware, reports what I2C devices exist. For example, there may be
@@ -201,21 +191,32 @@ a given I2C bus. This is for example the case of hardware monitoring
201devices on a PC's SMBus. In that case, you may want to let your driver 191devices on a PC's SMBus. In that case, you may want to let your driver
202detect supported devices automatically. This is how the legacy model 192detect supported devices automatically. This is how the legacy model
203was working, and is now available as an extension to the standard 193was working, and is now available as an extension to the standard
204driver model (so that we can finally get rid of the legacy model.) 194driver model.
205 195
206You simply have to define a detect callback which will attempt to 196You simply have to define a detect callback which will attempt to
207identify supported devices (returning 0 for supported ones and -ENODEV 197identify supported devices (returning 0 for supported ones and -ENODEV
208for unsupported ones), a list of addresses to probe, and a device type 198for unsupported ones), a list of addresses to probe, and a device type
209(or class) so that only I2C buses which may have that type of device 199(or class) so that only I2C buses which may have that type of device
210connected (and not otherwise enumerated) will be probed. The i2c 200connected (and not otherwise enumerated) will be probed. For example,
211core will then call you back as needed and will instantiate a device 201a driver for a hardware monitoring chip for which auto-detection is
212for you for every successful detection. 202needed would set its class to I2C_CLASS_HWMON, and only I2C adapters
203with a class including I2C_CLASS_HWMON would be probed by this driver.
204Note that the absence of matching classes does not prevent the use of
205a device of that type on the given I2C adapter. All it prevents is
206auto-detection; explicit instantiation of devices is still possible.
213 207
214Note that this mechanism is purely optional and not suitable for all 208Note that this mechanism is purely optional and not suitable for all
215devices. You need some reliable way to identify the supported devices 209devices. You need some reliable way to identify the supported devices
216(typically using device-specific, dedicated identification registers), 210(typically using device-specific, dedicated identification registers),
217otherwise misdetections are likely to occur and things can get wrong 211otherwise misdetections are likely to occur and things can get wrong
218quickly. 212quickly. Keep in mind that the I2C protocol doesn't include any
213standard way to detect the presence of a chip at a given address, let
214alone a standard way to identify devices. Even worse is the lack of
215semantics associated to bus transfers, which means that the same
216transfer can be seen as a read operation by a chip and as a write
217operation by another chip. For these reasons, explicit device
218instantiation should always be preferred to auto-detection where
219possible.
219 220
220 221
221Device Deletion 222Device Deletion
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
index 84f7cb3d5bec..ffb5c80bec3e 100644
--- a/Documentation/ia64/kvm.txt
+++ b/Documentation/ia64/kvm.txt
@@ -42,7 +42,7 @@ Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qe
42 hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg 42 hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
43 you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries. 43 you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
44 44
45 (3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu 45 (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
46 46
474. Boot up Linux or Windows guests: 474. Boot up Linux or Windows guests:
48 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy. 48 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
index 0c78f4b1d9d9..e77bebfa7b0d 100644
--- a/Documentation/ide/ide.txt
+++ b/Documentation/ide/ide.txt
@@ -216,6 +216,8 @@ Other kernel parameters for ide_core are:
216 216
217* "noflush=[interface_number.device_number]" to disable flush requests 217* "noflush=[interface_number.device_number]" to disable flush requests
218 218
219* "nohpa=[interface_number.device_number]" to disable Host Protected Area
220
219* "noprobe=[interface_number.device_number]" to skip probing 221* "noprobe=[interface_number.device_number]" to skip probing
220 222
221* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit 223* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 864ff3283780..6d40f00b358c 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -24,6 +24,49 @@ Partitions and P_Keys
24 The P_Key for any interface is given by the "pkey" file, and the 24 The P_Key for any interface is given by the "pkey" file, and the
25 main interface for a subinterface is in "parent." 25 main interface for a subinterface is in "parent."
26 26
27Datagram vs Connected modes
28
29 The IPoIB driver supports two modes of operation: datagram and
30 connected. The mode is set and read through an interface's
31 /sys/class/net/<intf name>/mode file.
32
33 In datagram mode, the IB UD (Unreliable Datagram) transport is used
34 and so the interface MTU has is equal to the IB L2 MTU minus the
35 IPoIB encapsulation header (4 bytes). For example, in a typical IB
36 fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
37
38 In connected mode, the IB RC (Reliable Connected) transport is used.
39 Connected mode is to takes advantage of the connected nature of the
40 IB transport and allows an MTU up to the maximal IP packet size of
41 64K, which reduces the number of IP packets needed for handling
42 large UDP datagrams, TCP segments, etc and increases the performance
43 for large messages.
44
45 In connected mode, the interface's UD QP is still used for multicast
46 and communication with peers that don't support connected mode. In
47 this case, RX emulation of ICMP PMTU packets is used to cause the
48 networking stack to use the smaller UD MTU for these neighbours.
49
50Stateless offloads
51
52 If the IB HW supports IPoIB stateless offloads, IPoIB advertises
53 TCP/IP checksum and/or Large Send (LSO) offloading capability to the
54 network stack.
55
56 Large Receive (LRO) offloading is also implemented and may be turned
57 on/off using ethtool calls. Currently LRO is supported only for
58 checksum offload capable devices.
59
60 Stateless offloads are supported only in datagram mode.
61
62Interrupt moderation
63
64 If the underlying IB device supports CQ event moderation, one can
65 use ethtool to set interrupt mitigation parameters and thus reduce
66 the overhead incurred by handling interrupts. The main code path of
67 IPoIB doesn't use events for TX completion signaling so only RX
68 moderation is supported.
69
27Debugging Information 70Debugging Information
28 71
29 By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set 72 By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set
@@ -55,3 +98,5 @@ References
55 http://ietf.org/rfc/rfc4391.txt 98 http://ietf.org/rfc/rfc4391.txt
56 IP over InfiniBand (IPoIB) Architecture (RFC 4392) 99 IP over InfiniBand (IPoIB) Architecture (RFC 4392)
57 http://ietf.org/rfc/rfc4392.txt 100 http://ietf.org/rfc/rfc4392.txt
101 IP over InfiniBand: Connected Mode (RFC 4755)
102 http://ietf.org/rfc/rfc4755.txt
diff --git a/Documentation/input/bcm5974.txt b/Documentation/input/bcm5974.txt
new file mode 100644
index 000000000000..5e22dcf6d48d
--- /dev/null
+++ b/Documentation/input/bcm5974.txt
@@ -0,0 +1,65 @@
1BCM5974 Driver (bcm5974)
2------------------------
3 Copyright (C) 2008-2009 Henrik Rydberg <rydberg@euromail.se>
4
5The USB initialization and package decoding was made by Scott Shawcroft as
6part of the touchd user-space driver project:
7 Copyright (C) 2008 Scott Shawcroft (scott.shawcroft@gmail.com)
8
9The BCM5974 driver is based on the appletouch driver:
10 Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
11 Copyright (C) 2005 Johannes Berg (johannes@sipsolutions.net)
12 Copyright (C) 2005 Stelian Pop (stelian@popies.net)
13 Copyright (C) 2005 Frank Arnold (frank@scirocco-5v-turbo.de)
14 Copyright (C) 2005 Peter Osterlund (petero2@telia.com)
15 Copyright (C) 2005 Michael Hanselmann (linux-kernel@hansmi.ch)
16 Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch)
17
18This driver adds support for the multi-touch trackpad on the new Apple
19Macbook Air and Macbook Pro laptops. It replaces the appletouch driver on
20those computers, and integrates well with the synaptics driver of the Xorg
21system.
22
23Known to work on Macbook Air, Macbook Pro Penryn and the new unibody
24Macbook 5 and Macbook Pro 5.
25
26Usage
27-----
28
29The driver loads automatically for the supported usb device ids, and
30becomes available both as an event device (/dev/input/event*) and as a
31mouse via the mousedev driver (/dev/input/mice).
32
33USB Race
34--------
35
36The Apple multi-touch trackpads report both mouse and keyboard events via
37different interfaces of the same usb device. This creates a race condition
38with the HID driver, which, if not told otherwise, will find the standard
39HID mouse and keyboard, and claim the whole device. To remedy, the usb
40product id must be listed in the mouse_ignore list of the hid driver.
41
42Debug output
43------------
44
45To ease the development for new hardware version, verbose packet output can
46be switched on with the debug kernel module parameter. The range [1-9]
47yields different levels of verbosity. Example (as root):
48
49echo -n 9 > /sys/module/bcm5974/parameters/debug
50
51tail -f /var/log/debug
52
53echo -n 0 > /sys/module/bcm5974/parameters/debug
54
55Trivia
56------
57
58The driver was developed at the ubuntu forums in June 2008 [1], and now has
59a more permanent home at bitmath.org [2].
60
61Links
62-----
63
64[1] http://ubuntuforums.org/showthread.php?t=840040
65[2] http://http://bitmath.org/code/
diff --git a/Documentation/input/input.txt b/Documentation/input/input.txt
index 686ee9932dff..b93c08442e3c 100644
--- a/Documentation/input/input.txt
+++ b/Documentation/input/input.txt
@@ -278,7 +278,7 @@ struct input_event {
278}; 278};
279 279
280 'time' is the timestamp, it returns the time at which the event happened. 280 'time' is the timestamp, it returns the time at which the event happened.
281Type is for example EV_REL for relative moment, REL_KEY for a keypress or 281Type is for example EV_REL for relative moment, EV_KEY for a keypress or
282release. More types are defined in include/linux/input.h. 282release. More types are defined in include/linux/input.h.
283 283
284 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete 284 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete
diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
new file mode 100644
index 000000000000..a12ea3b586e6
--- /dev/null
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -0,0 +1,195 @@
1Multi-touch (MT) Protocol
2-------------------------
3 Copyright (C) 2009 Henrik Rydberg <rydberg@euromail.se>
4
5
6Introduction
7------------
8
9In order to utilize the full power of the new multi-touch devices, a way to
10report detailed finger data to user space is needed. This document
11describes the multi-touch (MT) protocol which allows kernel drivers to
12report details for an arbitrary number of fingers.
13
14
15Usage
16-----
17
18Anonymous finger details are sent sequentially as separate packets of ABS
19events. Only the ABS_MT events are recognized as part of a finger
20packet. The end of a packet is marked by calling the input_mt_sync()
21function, which generates a SYN_MT_REPORT event. This instructs the
22receiver to accept the data for the current finger and prepare to receive
23another. The end of a multi-touch transfer is marked by calling the usual
24input_sync() function. This instructs the receiver to act upon events
25accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new
26set of events/packets.
27
28A set of ABS_MT events with the desired properties is defined. The events
29are divided into categories, to allow for partial implementation. The
30minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and
31ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the
32device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size
33of the approaching finger. Anisotropy and direction may be specified with
34ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. The
35ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a
36finger or a pen or something else. Devices with more granular information
37may specify general shapes as blobs, i.e., as a sequence of rectangular
38shapes grouped together by an ABS_MT_BLOB_ID. Finally, for the few devices
39that currently support it, the ABS_MT_TRACKING_ID event may be used to
40report finger tracking from hardware [5].
41
42Here is what a minimal event sequence for a two-finger touch would look
43like:
44
45 ABS_MT_TOUCH_MAJOR
46 ABS_MT_POSITION_X
47 ABS_MT_POSITION_Y
48 SYN_MT_REPORT
49 ABS_MT_TOUCH_MAJOR
50 ABS_MT_POSITION_X
51 ABS_MT_POSITION_Y
52 SYN_MT_REPORT
53 SYN_REPORT
54
55
56Event Semantics
57---------------
58
59The word "contact" is used to describe a tool which is in direct contact
60with the surface. A finger, a pen or a rubber all classify as contacts.
61
62ABS_MT_TOUCH_MAJOR
63
64The length of the major axis of the contact. The length should be given in
65surface units. If the surface has an X times Y resolution, the largest
66possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4].
67
68ABS_MT_TOUCH_MINOR
69
70The length, in surface units, of the minor axis of the contact. If the
71contact is circular, this event can be omitted [4].
72
73ABS_MT_WIDTH_MAJOR
74
75The length, in surface units, of the major axis of the approaching
76tool. This should be understood as the size of the tool itself. The
77orientation of the contact and the approaching tool are assumed to be the
78same [4].
79
80ABS_MT_WIDTH_MINOR
81
82The length, in surface units, of the minor axis of the approaching
83tool. Omit if circular [4].
84
85The above four values can be used to derive additional information about
86the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates
87the notion of pressure. The fingers of the hand and the palm all have
88different characteristic widths [1].
89
90ABS_MT_ORIENTATION
91
92The orientation of the ellipse. The value should describe a signed quarter
93of a revolution clockwise around the touch center. The signed value range
94is arbitrary, but zero should be returned for a finger aligned along the Y
95axis of the surface, a negative value when finger is turned to the left, and
96a positive value when finger turned to the right. When completely aligned with
97the X axis, the range max should be returned. Orientation can be omitted
98if the touching object is circular, or if the information is not available
99in the kernel driver. Partial orientation support is possible if the device
100can distinguish between the two axis, but not (uniquely) any values in
101between. In such cases, the range of ABS_MT_ORIENTATION should be [0, 1]
102[4].
103
104ABS_MT_POSITION_X
105
106The surface X coordinate of the center of the touching ellipse.
107
108ABS_MT_POSITION_Y
109
110The surface Y coordinate of the center of the touching ellipse.
111
112ABS_MT_TOOL_TYPE
113
114The type of approaching tool. A lot of kernel drivers cannot distinguish
115between different tool types, such as a finger or a pen. In such cases, the
116event should be omitted. The protocol currently supports MT_TOOL_FINGER and
117MT_TOOL_PEN [2].
118
119ABS_MT_BLOB_ID
120
121The BLOB_ID groups several packets together into one arbitrarily shaped
122contact. This is a low-level anonymous grouping, and should not be confused
123with the high-level trackingID [5]. Most kernel drivers will not have blob
124capability, and can safely omit the event.
125
126ABS_MT_TRACKING_ID
127
128The TRACKING_ID identifies an initiated contact throughout its life cycle
129[5]. There are currently only a few devices that support it, so this event
130should normally be omitted.
131
132
133Event Computation
134-----------------
135
136The flora of different hardware unavoidably leads to some devices fitting
137better to the MT protocol than others. To simplify and unify the mapping,
138this section gives recipes for how to compute certain events.
139
140For devices reporting contacts as rectangular shapes, signed orientation
141cannot be obtained. Assuming X and Y are the lengths of the sides of the
142touching rectangle, here is a simple formula that retains the most
143information possible:
144
145 ABS_MT_TOUCH_MAJOR := max(X, Y)
146 ABS_MT_TOUCH_MINOR := min(X, Y)
147 ABS_MT_ORIENTATION := bool(X > Y)
148
149The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that
150the device can distinguish between a finger along the Y axis (0) and a
151finger along the X axis (1).
152
153
154Finger Tracking
155---------------
156
157The kernel driver should generate an arbitrary enumeration of the set of
158anonymous contacts currently on the surface. The order in which the packets
159appear in the event stream is not important.
160
161The process of finger tracking, i.e., to assign a unique trackingID to each
162initiated contact on the surface, is left to user space; preferably the
163multi-touch X driver [3]. In that driver, the trackingID stays the same and
164unique until the contact vanishes (when the finger leaves the surface). The
165problem of assigning a set of anonymous fingers to a set of identified
166fingers is a euclidian bipartite matching problem at each event update, and
167relies on a sufficiently rapid update rate.
168
169There are a few devices that support trackingID in hardware. User space can
170make use of these native identifiers to reduce bandwidth and cpu usage.
171
172
173Notes
174-----
175
176In order to stay compatible with existing applications, the data
177reported in a finger packet must not be recognized as single-touch
178events. In addition, all finger data must bypass input filtering,
179since subsequent events of the same type refer to different fingers.
180
181The first kernel driver to utilize the MT protocol is the bcm5974 driver,
182where examples can be found.
183
184[1] With the extension ABS_MT_APPROACH_X and ABS_MT_APPROACH_Y, the
185difference between the contact position and the approaching tool position
186could be used to derive tilt.
187[2] The list can of course be extended.
188[3] The multi-touch X driver is currently in the prototyping stage. At the
189time of writing (April 2009), the MT protocol is not yet merged, and the
190prototype implements finger matching, basic mouse support and two-finger
191scrolling. The project aims at improving the quality of current multi-touch
192functionality available in the Synaptics X driver, and in addition
193implement more advanced gestures.
194[4] See the section on event computation.
195[5] See the section on finger tracking.
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
new file mode 100644
index 000000000000..3a6aec40c0b0
--- /dev/null
+++ b/Documentation/input/rotary-encoder.txt
@@ -0,0 +1,108 @@
1rotary-encoder - a generic driver for GPIO connected devices
2Daniel Mack <daniel@caiaq.de>, Feb 2009
3
40. Function
5-----------
6
7Rotary encoders are devices which are connected to the CPU or other
8peripherals with two wires. The outputs are phase-shifted by 90 degrees
9and by triggering on falling and rising edges, the turn direction can
10be determined.
11
12The phase diagram of these two outputs look like this:
13
14 _____ _____ _____
15 | | | | | |
16 Channel A ____| |_____| |_____| |____
17
18 : : : : : : : : : : : :
19 __ _____ _____ _____
20 | | | | | | |
21 Channel B |_____| |_____| |_____| |__
22
23 : : : : : : : : : : : :
24 Event a b c d a b c d a b c d
25
26 |<-------->|
27 one step
28
29
30For more information, please see
31 http://en.wikipedia.org/wiki/Rotary_encoder
32
33
341. Events / state machine
35-------------------------
36
37a) Rising edge on channel A, channel B in low state
38 This state is used to recognize a clockwise turn
39
40b) Rising edge on channel B, channel A in high state
41 When entering this state, the encoder is put into 'armed' state,
42 meaning that there it has seen half the way of a one-step transition.
43
44c) Falling edge on channel A, channel B in high state
45 This state is used to recognize a counter-clockwise turn
46
47d) Falling edge on channel B, channel A in low state
48 Parking position. If the encoder enters this state, a full transition
49 should have happend, unless it flipped back on half the way. The
50 'armed' state tells us about that.
51
522. Platform requirements
53------------------------
54
55As there is no hardware dependent call in this driver, the platform it is
56used with must support gpiolib. Another requirement is that IRQs must be
57able to fire on both edges.
58
59
603. Board integration
61--------------------
62
63To use this driver in your system, register a platform_device with the
64name 'rotary-encoder' and associate the IRQs and some specific platform
65data with it.
66
67struct rotary_encoder_platform_data is declared in
68include/linux/rotary-encoder.h and needs to be filled with the number of
69steps the encoder has and can carry information about externally inverted
70signals (because of an inverting buffer or other reasons). The encoder
71can be set up to deliver input information as either an absolute or relative
72axes. For relative axes the input event returns +/-1 for each step. For
73absolute axes the position of the encoder can either roll over between zero
74and the number of steps or will clamp at the maximum and zero depending on
75the configuration.
76
77Because GPIO to IRQ mapping is platform specific, this information must
78be given in seperately to the driver. See the example below.
79
80---------<snip>---------
81
82/* board support file example */
83
84#include <linux/input.h>
85#include <linux/rotary_encoder.h>
86
87#define GPIO_ROTARY_A 1
88#define GPIO_ROTARY_B 2
89
90static struct rotary_encoder_platform_data my_rotary_encoder_info = {
91 .steps = 24,
92 .axis = ABS_X,
93 .relative_axis = false,
94 .rollover = false,
95 .gpio_a = GPIO_ROTARY_A,
96 .gpio_b = GPIO_ROTARY_B,
97 .inverted_a = 0,
98 .inverted_b = 0,
99};
100
101static struct platform_device rotary_encoder_device = {
102 .name = "rotary-encoder",
103 .id = 0,
104 .dev = {
105 .platform_data = &my_rotary_encoder_info,
106 }
107};
108
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index f1d639903325..7bb0d934b6d8 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -122,10 +122,8 @@ Code Seq# Include File Comments
122'c' 00-7F linux/coda.h conflict! 122'c' 00-7F linux/coda.h conflict!
123'c' 80-9F arch/s390/include/asm/chsc.h 123'c' 80-9F arch/s390/include/asm/chsc.h
124'd' 00-FF linux/char/drm/drm/h conflict! 124'd' 00-FF linux/char/drm/drm/h conflict!
125'd' 00-DF linux/video_decoder.h conflict!
126'd' F0-FF linux/digi1.h 125'd' F0-FF linux/digi1.h
127'e' all linux/digi1.h conflict! 126'e' all linux/digi1.h conflict!
128'e' 00-1F linux/video_encoder.h conflict!
129'e' 00-1F net/irda/irtty.h conflict! 127'e' 00-1F net/irda/irtty.h conflict!
130'f' 00-1F linux/ext2_fs.h 128'f' 00-1F linux/ext2_fs.h
131'h' 00-7F Charon filesystem 129'h' 00-7F Charon filesystem
@@ -151,6 +149,8 @@ Code Seq# Include File Comments
151'p' 40-7F linux/nvram.h 149'p' 40-7F linux/nvram.h
152'p' 80-9F user-space parport 150'p' 80-9F user-space parport
153 <mailto:tim@cyberelk.net> 151 <mailto:tim@cyberelk.net>
152'p' a1-a4 linux/pps.h LinuxPPS
153 <mailto:giometti@linux.it>
154'q' 00-1F linux/serio.h 154'q' 00-1F linux/serio.h
155'q' 80-FF Internet PhoneJACK, Internet LineJACK 155'q' 80-FF Internet PhoneJACK, Internet LineJACK
156 <http://www.quicknet.net> 156 <http://www.quicknet.net>
diff --git a/Documentation/isdn/00-INDEX b/Documentation/isdn/00-INDEX
index 9fee5f2e5c62..e87e336f590e 100644
--- a/Documentation/isdn/00-INDEX
+++ b/Documentation/isdn/00-INDEX
@@ -2,42 +2,49 @@
2 - this file (info on ISDN implementation for Linux) 2 - this file (info on ISDN implementation for Linux)
3CREDITS 3CREDITS
4 - list of the kind folks that brought you this stuff. 4 - list of the kind folks that brought you this stuff.
5HiSax.cert
6 - information about the ITU approval certification of the HiSax driver.
5INTERFACE 7INTERFACE
6 - description of Linklevel and Hardwarelevel ISDN interface. 8 - description of isdn4linux Link Level and Hardware Level interfaces.
9INTERFACE.fax
10 - description of the fax subinterface of isdn4linux.
11INTERFACE.CAPI
12 - description of kernel CAPI Link Level to Hardware Level interface.
7README 13README
8 - general info on what you need and what to do for Linux ISDN. 14 - general info on what you need and what to do for Linux ISDN.
9README.FAQ 15README.FAQ
10 - general info for FAQ. 16 - general info for FAQ.
17README.HiSax
18 - info on the HiSax driver which replaces the old teles.
19README.act2000
20 - info on driver for IBM ACT-2000 card.
11README.audio 21README.audio
12 - info for running audio over ISDN. 22 - info for running audio over ISDN.
23README.avmb1
24 - info on driver for AVM-B1 ISDN card.
25README.concap
26 - info on "CONCAP" encapsulation protocol interface used for X.25.
27README.diversion
28 - info on module for isdn diversion services.
13README.fax 29README.fax
14 - info for using Fax over ISDN. 30 - info for using Fax over ISDN.
15README.icn 31README.gigaset
16 - info on the ICN-ISDN-card and its driver. 32 - info on the drivers for Siemens Gigaset ISDN adapters
17README.HiSax
18 - info on the HiSax driver which replaces the old teles.
19README.hfc-pci 33README.hfc-pci
20 - info on hfc-pci based cards. 34 - info on hfc-pci based cards.
35README.hysdn
36 - info on driver for Hypercope active HYSDN cards
37README.icn
38 - info on the ICN-ISDN-card and its driver.
39README.mISDN
40 - info on the Modular ISDN subsystem (mISDN)
21README.pcbit 41README.pcbit
22 - info on the PCBIT-D ISDN adapter and driver. 42 - info on the PCBIT-D ISDN adapter and driver.
43README.sc
44 - info on driver for Spellcaster cards.
23README.syncppp 45README.syncppp
24 - info on running Sync PPP over ISDN. 46 - info on running Sync PPP over ISDN.
47README.x25
48 - info for running X.25 over ISDN.
25syncPPP.FAQ 49syncPPP.FAQ
26 - frequently asked questions about running PPP over ISDN. 50 - frequently asked questions about running PPP over ISDN.
27README.avmb1
28 - info on driver for AVM-B1 ISDN card.
29README.act2000
30 - info on driver for IBM ACT-2000 card.
31README.eicon
32 - info on driver for Eicon active cards.
33README.concap
34 - info on "CONCAP" encapsulation protocol interface used for X.25.
35README.diversion
36 - info on module for isdn diversion services.
37README.sc
38 - info on driver for Spellcaster cards.
39README.x25
40 _ info for running X.25 over ISDN.
41README.hysdn
42 - info on driver for Hypercope active HYSDN cards
43
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
new file mode 100644
index 000000000000..686e107923ec
--- /dev/null
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -0,0 +1,299 @@
1Kernel CAPI Interface to Hardware Drivers
2-----------------------------------------
3
41. Overview
5
6From the CAPI 2.0 specification:
7COMMON-ISDN-API (CAPI) is an application programming interface standard used
8to access ISDN equipment connected to basic rate interfaces (BRI) and primary
9rate interfaces (PRI).
10
11Kernel CAPI operates as a dispatching layer between CAPI applications and CAPI
12hardware drivers. Hardware drivers register ISDN devices (controllers, in CAPI
13lingo) with Kernel CAPI to indicate their readiness to provide their service
14to CAPI applications. CAPI applications also register with Kernel CAPI,
15requesting association with a CAPI device. Kernel CAPI then dispatches the
16application registration to an available device, forwarding it to the
17corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both
18directions between the application and the hardware driver.
19
20Format and semantics of CAPI messages are specified in the CAPI 2.0 standard.
21This standard is freely available from http://www.capi.org.
22
23
242. Driver and Device Registration
25
26CAPI drivers optionally register themselves with Kernel CAPI by calling the
27Kernel CAPI function register_capi_driver() with a pointer to a struct
28capi_driver. This structure must be filled with the name and revision of the
29driver, and optionally a pointer to a callback function, add_card(). The
30registration can be revoked by calling the function unregister_capi_driver()
31with a pointer to the same struct capi_driver.
32
33CAPI drivers must register each of the ISDN devices they control with Kernel
34CAPI by calling the Kernel CAPI function attach_capi_ctr() with a pointer to a
35struct capi_ctr before they can be used. This structure must be filled with
36the names of the driver and controller, and a number of callback function
37pointers which are subsequently used by Kernel CAPI for communicating with the
38driver. The registration can be revoked by calling the function
39detach_capi_ctr() with a pointer to the same struct capi_ctr.
40
41Before the device can be actually used, the driver must fill in the device
42information fields 'manu', 'version', 'profile' and 'serial' in the capi_ctr
43structure of the device, and signal its readiness by calling capi_ctr_ready().
44From then on, Kernel CAPI may call the registered callback functions for the
45device.
46
47If the device becomes unusable for any reason (shutdown, disconnect ...), the
48driver has to call capi_ctr_down(). This will prevent further calls to the
49callback functions by Kernel CAPI.
50
51
523. Application Registration and Communication
53
54Kernel CAPI forwards registration requests from applications (calls to CAPI
55operation CAPI_REGISTER) to an appropriate hardware driver by calling its
56register_appl() callback function. A unique Application ID (ApplID, u16) is
57allocated by Kernel CAPI and passed to register_appl() along with the
58parameter structure provided by the application. This is analogous to the
59open() operation on regular files or character devices.
60
61After a successful return from register_appl(), CAPI messages from the
62application may be passed to the driver for the device via calls to the
63send_message() callback function. The CAPI message to send is stored in the
64data portion of an skb. Conversely, the driver may call Kernel CAPI's
65capi_ctr_handle_message() function to pass a received CAPI message to Kernel
66CAPI for forwarding to an application, specifying its ApplID.
67
68Deregistration requests (CAPI operation CAPI_RELEASE) from applications are
69forwarded as calls to the release_appl() callback function, passing the same
70ApplID as with register_appl(). After return from release_appl(), no CAPI
71messages for that application may be passed to or from the device anymore.
72
73
744. Data Structures
75
764.1 struct capi_driver
77
78This structure describes a Kernel CAPI driver itself. It is used in the
79register_capi_driver() and unregister_capi_driver() functions, and contains
80the following non-private fields, all to be set by the driver before calling
81register_capi_driver():
82
83char name[32]
84 the name of the driver, as a zero-terminated ASCII string
85char revision[32]
86 the revision number of the driver, as a zero-terminated ASCII string
87int (*add_card)(struct capi_driver *driver, capicardparams *data)
88 a callback function pointer (may be NULL)
89
90
914.2 struct capi_ctr
92
93This structure describes an ISDN device (controller) handled by a Kernel CAPI
94driver. After registration via the attach_capi_ctr() function it is passed to
95all controller specific lower layer interface and callback functions to
96identify the controller to operate on.
97
98It contains the following non-private fields:
99
100- to be set by the driver before calling attach_capi_ctr():
101
102struct module *owner
103 pointer to the driver module owning the device
104
105void *driverdata
106 an opaque pointer to driver specific data, not touched by Kernel CAPI
107
108char name[32]
109 the name of the controller, as a zero-terminated ASCII string
110
111char *driver_name
112 the name of the driver, as a zero-terminated ASCII string
113
114int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata)
115 (optional) pointer to a callback function for sending firmware and
116 configuration data to the device
117 Return value: 0 on success, error code on error
118 Called in process context.
119
120void (*reset_ctr)(struct capi_ctr *ctrlr)
121 (optional) pointer to a callback function for performing a reset on
122 the device, releasing all registered applications
123 Called in process context.
124
125void (*register_appl)(struct capi_ctr *ctrlr, u16 applid,
126 capi_register_params *rparam)
127void (*release_appl)(struct capi_ctr *ctrlr, u16 applid)
128 pointers to callback functions for registration and deregistration of
129 applications with the device
130 Calls to these functions are serialized by Kernel CAPI so that only
131 one call to any of them is active at any time.
132
133u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)
134 pointer to a callback function for sending a CAPI message to the
135 device
136 Return value: CAPI error code
137 If the method returns 0 (CAPI_NOERROR) the driver has taken ownership
138 of the skb and the caller may no longer access it. If it returns a
139 non-zero (error) value then ownership of the skb returns to the caller
140 who may reuse or free it.
141 The return value should only be used to signal problems with respect
142 to accepting or queueing the message. Errors occurring during the
143 actual processing of the message should be signaled with an
144 appropriate reply message.
145 Calls to this function are not serialized by Kernel CAPI, ie. it must
146 be prepared to be re-entered.
147
148char *(*procinfo)(struct capi_ctr *ctrlr)
149 pointer to a callback function returning the entry for the device in
150 the CAPI controller info table, /proc/capi/controller
151
152read_proc_t *ctr_read_proc
153 pointer to the read_proc callback function for the device's proc file
154 system entry, /proc/capi/controllers/<n>; will be called with a
155 pointer to the device's capi_ctr structure as the last (data) argument
156
157Note: Callback functions are never called in interrupt context.
158
159- to be filled in before calling capi_ctr_ready():
160
161u8 manu[CAPI_MANUFACTURER_LEN]
162 value to return for CAPI_GET_MANUFACTURER
163
164capi_version version
165 value to return for CAPI_GET_VERSION
166
167capi_profile profile
168 value to return for CAPI_GET_PROFILE
169
170u8 serial[CAPI_SERIAL_LEN]
171 value to return for CAPI_GET_SERIAL
172
173
1744.3 The _cmsg Structure
175
176(declared in <linux/isdn/capiutil.h>)
177
178The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
179accessible form. It contains members for all possible CAPI 2.0 parameters, of
180which only those appearing in the message type currently being processed are
181actually used. Unused members should be set to zero.
182
183Members are named after the CAPI 2.0 standard names of the parameters they
184represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
185types are:
186
187u8 for CAPI parameters of type 'byte'
188
189u16 for CAPI parameters of type 'word'
190
191u32 for CAPI parameters of type 'dword'
192
193_cstruct for CAPI parameters of type 'struct' not containing any
194 variably-sized (struct) subparameters (eg. 'Called Party Number')
195 The member is a pointer to a buffer containing the parameter in
196 CAPI encoding (length + content). It may also be NULL, which will
197 be taken to represent an empty (zero length) parameter.
198
199_cmstruct for CAPI parameters of type 'struct' containing 'struct'
200 subparameters ('Additional Info' and 'B Protocol')
201 The representation is a single byte containing one of the values:
202 CAPI_DEFAULT: the parameter is empty
203 CAPI_COMPOSE: the values of the subparameters are stored
204 individually in the corresponding _cmsg structure members
205
206Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
207messages between their transport encoding described in the CAPI 2.0 standard
208and their _cmsg structure representation. Note that capi_cmsg2message() does
209not know or check the size of its destination buffer. The caller must make
210sure it is big enough to accomodate the resulting CAPI message.
211
212
2135. Lower Layer Interface Functions
214
215(declared in <linux/isdn/capilli.h>)
216
217void register_capi_driver(struct capi_driver *drvr)
218void unregister_capi_driver(struct capi_driver *drvr)
219 register/unregister a driver with Kernel CAPI
220
221int attach_capi_ctr(struct capi_ctr *ctrlr)
222int detach_capi_ctr(struct capi_ctr *ctrlr)
223 register/unregister a device (controller) with Kernel CAPI
224
225void capi_ctr_ready(struct capi_ctr *ctrlr)
226void capi_ctr_down(struct capi_ctr *ctrlr)
227 signal controller ready/not ready
228
229void capi_ctr_suspend_output(struct capi_ctr *ctrlr)
230void capi_ctr_resume_output(struct capi_ctr *ctrlr)
231 signal suspend/resume
232
233void capi_ctr_handle_message(struct capi_ctr * ctrlr, u16 applid,
234 struct sk_buff *skb)
235 pass a received CAPI message to Kernel CAPI
236 for forwarding to the specified application
237
238
2396. Helper Functions and Macros
240
241Library functions (from <linux/isdn/capilli.h>):
242
243void capilib_new_ncci(struct list_head *head, u16 applid,
244 u32 ncci, u32 winsize)
245void capilib_free_ncci(struct list_head *head, u16 applid, u32 ncci)
246void capilib_release_appl(struct list_head *head, u16 applid)
247void capilib_release(struct list_head *head)
248void capilib_data_b3_conf(struct list_head *head, u16 applid,
249 u32 ncci, u16 msgid)
250u16 capilib_data_b3_req(struct list_head *head, u16 applid,
251 u32 ncci, u16 msgid)
252
253
254Macros to extract/set element values from/in a CAPI message header
255(from <linux/isdn/capiutil.h>):
256
257Get Macro Set Macro Element (Type)
258
259CAPIMSG_LEN(m) CAPIMSG_SETLEN(m, len) Total Length (u16)
260CAPIMSG_APPID(m) CAPIMSG_SETAPPID(m, applid) ApplID (u16)
261CAPIMSG_COMMAND(m) CAPIMSG_SETCOMMAND(m,cmd) Command (u8)
262CAPIMSG_SUBCOMMAND(m) CAPIMSG_SETSUBCOMMAND(m, cmd) Subcommand (u8)
263CAPIMSG_CMD(m) - Command*256
264 + Subcommand (u16)
265CAPIMSG_MSGID(m) CAPIMSG_SETMSGID(m, msgid) Message Number (u16)
266
267CAPIMSG_CONTROL(m) CAPIMSG_SETCONTROL(m, contr) Controller/PLCI/NCCI
268 (u32)
269CAPIMSG_DATALEN(m) CAPIMSG_SETDATALEN(m, len) Data Length (u16)
270
271
272Library functions for working with _cmsg structures
273(from <linux/isdn/capiutil.h>):
274
275unsigned capi_cmsg2message(_cmsg *cmsg, u8 *msg)
276 Assembles a CAPI 2.0 message from the parameters in *cmsg, storing the
277 result in *msg.
278
279unsigned capi_message2cmsg(_cmsg *cmsg, u8 *msg)
280 Disassembles the CAPI 2.0 message in *msg, storing the parameters in
281 *cmsg.
282
283unsigned capi_cmsg_header(_cmsg *cmsg, u16 ApplId, u8 Command, u8 Subcommand,
284 u16 Messagenumber, u32 Controller)
285 Fills the header part and address field of the _cmsg structure *cmsg
286 with the given values, zeroing the remainder of the structure so only
287 parameters with non-default values need to be changed before sending
288 the message.
289
290void capi_cmsg_answer(_cmsg *cmsg)
291 Sets the low bit of the Subcommand field in *cmsg, thereby converting
292 _REQ to _CONF and _IND to _RESP.
293
294char *capi_cmd2str(u8 Command, u8 Subcommand)
295 Returns the CAPI 2.0 message name corresponding to the given command
296 and subcommand values, as a static ASCII string. The return value may
297 be NULL if the command/subcommand is not one of those defined in the
298 CAPI 2.0 standard.
299
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
index 55b2852904a4..f9963103ae3d 100644
--- a/Documentation/isdn/README.gigaset
+++ b/Documentation/isdn/README.gigaset
@@ -61,24 +61,28 @@ GigaSet 307x Device Driver
61 --------------------- 61 ---------------------
622.1. Modules 622.1. Modules
63 ------- 63 -------
64 To get the device working, you have to load the proper kernel module. You 64 For the devices to work, the proper kernel modules have to be loaded.
65 can do this using 65 This normally happens automatically when the system detects the USB
66 modprobe modulename 66 device (base, M105) or when the line discipline is attached (M101). It
67 where modulename is ser_gigaset (M101), usb_gigaset (M105), or 67 can also be triggered manually using the modprobe(8) command, for example
68 bas_gigaset (direct USB connection to the base). 68 for troubleshooting or to pass module parameters.
69 69
70 The module ser_gigaset provides a serial line discipline N_GIGASET_M101 70 The module ser_gigaset provides a serial line discipline N_GIGASET_M101
71 which drives the device through the regular serial line driver. To use it, 71 which drives the device through the regular serial line driver. It must
72 run the Gigaset M101 daemon "gigasetm101d" (also available from 72 be attached to the serial line to which the M101 is connected with the
73 http://sourceforge.net/projects/gigaset307x/) with the device file of the 73 ldattach(8) command (requires util-linux-ng release 2.14 or later), for
74 RS232 port to the M101 as an argument, for example: 74 example:
75 gigasetm101d /dev/ttyS1 75 ldattach GIGASET_M101 /dev/ttyS1
76 This will open the device file, set its line discipline to N_GIGASET_M101, 76 This will open the device file, attach the line discipline to it, and
77 and then sleep in the background, keeping the device open so that the 77 then sleep in the background, keeping the device open so that the line
78 line discipline remains active. To deactivate it, kill the daemon, for 78 discipline remains active. To deactivate it, kill the daemon, for example
79 example with 79 with
80 killall gigasetm101d 80 killall ldattach
81 before disconnecting the device. 81 before disconnecting the device. To have this happen automatically at
82 system startup/shutdown on an LSB compatible system, create and activate
83 an appropriate LSB startup script /etc/init.d/gigaset. (The init name
84 'gigaset' is officially assigned to this project by LANANA.)
85 Alternatively, just add the 'ldattach' command line to /etc/rc.local.
82 86
832.2. Device nodes for user space programs 872.2. Device nodes for user space programs
84 ------------------------------------ 88 ------------------------------------
@@ -145,10 +149,8 @@ GigaSet 307x Device Driver
145 configuration files and chat scripts in the gigaset-VERSION/ppp directory 149 configuration files and chat scripts in the gigaset-VERSION/ppp directory
146 in the driver packages from http://sourceforge.net/projects/gigaset307x/. 150 in the driver packages from http://sourceforge.net/projects/gigaset307x/.
147 Please note that the USB drivers are not able to change the state of the 151 Please note that the USB drivers are not able to change the state of the
148 control lines (the M105 driver can be configured to use some undocumented 152 control lines. This means you must use "Stupid Mode" if you are using
149 control requests, if you really need the control lines, though). This means 153 wvdial or you should use the nocrtscts option of pppd.
150 you must use "Stupid Mode" if you are using wvdial or you should use the
151 nocrtscts option of pppd.
152 You must also assure that the ppp_async module is loaded with the parameter 154 You must also assure that the ppp_async module is loaded with the parameter
153 flag_time=0. You can do this e.g. by adding a line like 155 flag_time=0. You can do this e.g. by adding a line like
154 156
@@ -186,19 +188,19 @@ GigaSet 307x Device Driver
186 You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode 188 You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode
187 setting (ttyGxy is ttyGU0 or ttyGB0). 189 setting (ttyGxy is ttyGU0 or ttyGB0).
188 190
1892.6. M105 Undocumented USB Requests 1912.6. Unregistered Wireless Devices (M101/M105)
190 ------------------------------ 192 -----------------------------------------
191 193 The main purpose of the ser_gigaset and usb_gigaset drivers is to allow
192 The Gigaset M105 USB data box understands a couple of useful, but 194 the M101 and M105 wireless devices to be used as ISDN devices for ISDN
193 undocumented USB commands. These requests are not used in normal 195 connections through a Gigaset base. Therefore they assume that the device
194 operation (for wireless access to the base), but are needed for access 196 is registered to a DECT base.
195 to the M105's own configuration mode (registration to the base, baudrate 197
196 and line format settings, device status queries) via the gigacontr 198 If the M101/M105 device is not registered to a base, initialization of
197 utility. Their use is disabled in the driver by default for safety 199 the device fails, and a corresponding error message is logged by the
198 reasons but can be enabled by setting the kernel configuration option 200 driver. In that situation, a restricted set of functions is available
199 "Support for undocumented USB requests" (GIGASET_UNDOCREQ) to "Y" and 201 which includes, in particular, those necessary for registering the device
200 recompiling. 202 to a base or for switching it between Fixed Part and Portable Part
201 203 modes.
202 204
2033. Troubleshooting 2053. Troubleshooting
204 --------------- 206 ---------------
@@ -228,6 +230,14 @@ GigaSet 307x Device Driver
228 Solution: 230 Solution:
229 Select Unimodem mode for all DECT data adapters. (see section 2.4.) 231 Select Unimodem mode for all DECT data adapters. (see section 2.4.)
230 232
233 Problem:
234 Messages like this:
235 usb_gigaset 3-2:1.0: Could not initialize the device.
236 appear in your syslog.
237 Solution:
238 Check whether your M10x wireless device is correctly registered to the
239 Gigaset base. (see section 2.6.)
240
2313.2. Telling the driver to provide more information 2413.2. Telling the driver to provide more information
232 ---------------------------------------------- 242 ----------------------------------------------
233 Building the driver with the "Gigaset debugging" kernel configuration 243 Building the driver with the "Gigaset debugging" kernel configuration
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist
index 6c42e071d723..2df4576f1173 100644
--- a/Documentation/ja_JP/SubmitChecklist
+++ b/Documentation/ja_JP/SubmitChecklist
@@ -75,7 +75,7 @@ Linux カーネルパッチ投稿者向けチェックリスト
75 ビルドした上、動作確認を行ってください。 75 ビルドした上、動作確認を行ってください。
76 76
7714: もしパッチがディスクのI/O性能などに影響を与えるようであれば、 7714: もしパッチがディスクのI/O性能などに影響を与えるようであれば、
78 'CONFIG_LBD'オプションを有効にした場合と無効にした場合の両方で 78 'CONFIG_LBDAF'オプションを有効にした場合と無効にした場合の両方で
79 テストを実施してみてください。 79 テストを実施してみてください。
80 80
8115: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。 8115: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。
diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
index 26a7c0a93193..849b5e56d06f 100644
--- a/Documentation/kbuild/kconfig.txt
+++ b/Documentation/kbuild/kconfig.txt
@@ -35,48 +35,26 @@ new .config files to see the differences:
35 35
36(Yes, we need something better here.) 36(Yes, we need something better here.)
37 37
38
39======================================================================
40menuconfig
41--------------------------------------------------
42
43SEARCHING for CONFIG symbols
44
45Searching in menuconfig:
46
47 The Search function searches for kernel configuration symbol
48 names, so you have to know something close to what you are
49 looking for.
50
51 Example:
52 /hotplug
53 This lists all config symbols that contain "hotplug",
54 e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
55
56 For search help, enter / followed TAB-TAB-TAB (to highlight
57 <Help>) and Enter. This will tell you that you can also use
58 regular expressions (regexes) in the search string, so if you
59 are not interested in MEMORY_HOTPLUG, you could try
60
61 /^hotplug
62
63
64______________________________________________________________________ 38______________________________________________________________________
65Color Themes for 'menuconfig' 39Environment variables for '*config'
66 40
67It is possible to select different color themes using the variable 41KCONFIG_CONFIG
68MENUCONFIG_COLOR. To select a theme use: 42--------------------------------------------------
43This environment variable can be used to specify a default kernel config
44file name to override the default name of ".config".
69 45
70 make MENUCONFIG_COLOR=<theme> menuconfig 46KCONFIG_OVERWRITECONFIG
47--------------------------------------------------
48If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
49break symlinks when .config is a symlink to somewhere else.
71 50
72Available themes are: 51KCONFIG_NOTIMESTAMP
73 mono => selects colors suitable for monochrome displays 52--------------------------------------------------
74 blackbg => selects a color scheme with black background 53If this environment variable exists and is non-null, the timestamp line
75 classic => theme with blue background. The classic look 54in generated .config files is omitted.
76 bluetitle => a LCD friendly version of classic. (default)
77 55
78______________________________________________________________________ 56______________________________________________________________________
79Environment variables in 'menuconfig' 57Environment variables for '{allyes/allmod/allno/rand}config'
80 58
81KCONFIG_ALLCONFIG 59KCONFIG_ALLCONFIG
82-------------------------------------------------- 60--------------------------------------------------
@@ -95,8 +73,7 @@ values.
95This enables you to create "miniature" config (miniconfig) or custom 73This enables you to create "miniature" config (miniconfig) or custom
96config files containing just the config symbols that you are interested 74config files containing just the config symbols that you are interested
97in. Then the kernel config system generates the full .config file, 75in. Then the kernel config system generates the full .config file,
98including dependencies of your miniconfig file, based on the miniconfig 76including symbols of your miniconfig file.
99file.
100 77
101This 'KCONFIG_ALLCONFIG' file is a config file which contains 78This 'KCONFIG_ALLCONFIG' file is a config file which contains
102(usually a subset of all) preset config symbols. These variable 79(usually a subset of all) preset config symbols. These variable
@@ -113,26 +90,14 @@ These examples will disable most options (allnoconfig) but enable or
113disable the options that are explicitly listed in the specified 90disable the options that are explicitly listed in the specified
114mini-config files. 91mini-config files.
115 92
93______________________________________________________________________
94Environment variables for 'silentoldconfig'
95
116KCONFIG_NOSILENTUPDATE 96KCONFIG_NOSILENTUPDATE
117-------------------------------------------------- 97--------------------------------------------------
118If this variable has a non-blank value, it prevents silent kernel 98If this variable has a non-blank value, it prevents silent kernel
119config udpates (requires explicit updates). 99config udpates (requires explicit updates).
120 100
121KCONFIG_CONFIG
122--------------------------------------------------
123This environment variable can be used to specify a default kernel config
124file name to override the default name of ".config".
125
126KCONFIG_OVERWRITECONFIG
127--------------------------------------------------
128If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
129break symlinks when .config is a symlink to somewhere else.
130
131KCONFIG_NOTIMESTAMP
132--------------------------------------------------
133If this environment variable exists and is non-null, the timestamp line
134in generated .config files is omitted.
135
136KCONFIG_AUTOCONFIG 101KCONFIG_AUTOCONFIG
137-------------------------------------------------- 102--------------------------------------------------
138This environment variable can be set to specify the path & name of the 103This environment variable can be set to specify the path & name of the
@@ -143,15 +108,54 @@ KCONFIG_AUTOHEADER
143This environment variable can be set to specify the path & name of the 108This environment variable can be set to specify the path & name of the
144"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h". 109"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h".
145 110
111
112======================================================================
113menuconfig
114--------------------------------------------------
115
116SEARCHING for CONFIG symbols
117
118Searching in menuconfig:
119
120 The Search function searches for kernel configuration symbol
121 names, so you have to know something close to what you are
122 looking for.
123
124 Example:
125 /hotplug
126 This lists all config symbols that contain "hotplug",
127 e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
128
129 For search help, enter / followed TAB-TAB-TAB (to highlight
130 <Help>) and Enter. This will tell you that you can also use
131 regular expressions (regexes) in the search string, so if you
132 are not interested in MEMORY_HOTPLUG, you could try
133
134 /^hotplug
135
146______________________________________________________________________ 136______________________________________________________________________
147menuconfig User Interface Options 137User interface options for 'menuconfig'
148---------------------------------------------------------------------- 138
139MENUCONFIG_COLOR
140--------------------------------------------------
141It is possible to select different color themes using the variable
142MENUCONFIG_COLOR. To select a theme use:
143
144 make MENUCONFIG_COLOR=<theme> menuconfig
145
146Available themes are:
147 mono => selects colors suitable for monochrome displays
148 blackbg => selects a color scheme with black background
149 classic => theme with blue background. The classic look
150 bluetitle => a LCD friendly version of classic. (default)
151
149MENUCONFIG_MODE 152MENUCONFIG_MODE
150-------------------------------------------------- 153--------------------------------------------------
151This mode shows all sub-menus in one large tree. 154This mode shows all sub-menus in one large tree.
152 155
153Example: 156Example:
154 MENUCONFIG_MODE=single_menu make menuconfig 157 make MENUCONFIG_MODE=single_menu menuconfig
158
155 159
156====================================================================== 160======================================================================
157xconfig 161xconfig
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 51104f9194a5..d76cfd8712e1 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -40,10 +40,16 @@ This document describes the Linux kernel Makefiles.
40 --- 6.7 Custom kbuild commands 40 --- 6.7 Custom kbuild commands
41 --- 6.8 Preprocessing linker scripts 41 --- 6.8 Preprocessing linker scripts
42 42
43 === 7 Kbuild Variables 43 === 7 Kbuild syntax for exported headers
44 === 8 Makefile language 44 --- 7.1 header-y
45 === 9 Credits 45 --- 7.2 objhdr-y
46 === 10 TODO 46 --- 7.3 destination-y
47 --- 7.4 unifdef-y (deprecated)
48
49 === 8 Kbuild Variables
50 === 9 Makefile language
51 === 10 Credits
52 === 11 TODO
47 53
48=== 1 Overview 54=== 1 Overview
49 55
@@ -310,6 +316,16 @@ more details, with real examples.
310 #arch/m68k/fpsp040/Makefile 316 #arch/m68k/fpsp040/Makefile
311 ldflags-y := -x 317 ldflags-y := -x
312 318
319 subdir-ccflags-y, subdir-asflags-y
320 The two flags listed above are similar to ccflags-y and as-falgs-y.
321 The difference is that the subdir- variants has effect for the kbuild
322 file where tey are present and all subdirectories.
323 Options specified using subdir-* are added to the commandline before
324 the options specified using the non-subdir variants.
325
326 Example:
327 subdir-ccflags-y := -Werror
328
313 CFLAGS_$@, AFLAGS_$@ 329 CFLAGS_$@, AFLAGS_$@
314 330
315 CFLAGS_$@ and AFLAGS_$@ only apply to commands in current 331 CFLAGS_$@ and AFLAGS_$@ only apply to commands in current
@@ -1143,8 +1159,69 @@ When kbuild executes, the following steps are followed (roughly):
1143 The kbuild infrastructure for *lds file are used in several 1159 The kbuild infrastructure for *lds file are used in several
1144 architecture-specific files. 1160 architecture-specific files.
1145 1161
1162=== 7 Kbuild syntax for exported headers
1163
1164The kernel include a set of headers that is exported to userspace.
1165Many headers can be exported as-is but other headers requires a
1166minimal pre-processing before they are ready for user-space.
1167The pre-processing does:
1168- drop kernel specific annotations
1169- drop include of compiler.h
1170- drop all sections that is kernel internat (guarded by ifdef __KERNEL__)
1171
1172Each relevant directory contain a file name "Kbuild" which specify the
1173headers to be exported.
1174See subsequent chapter for the syntax of the Kbuild file.
1175
1176 --- 7.1 header-y
1177
1178 header-y specify header files to be exported.
1179
1180 Example:
1181 #include/linux/Kbuild
1182 header-y += usb/
1183 header-y += aio_abi.h
1184
1185 The convention is to list one file per line and
1186 preferably in alphabetic order.
1187
1188 header-y also specify which subdirectories to visit.
1189 A subdirectory is identified by a trailing '/' which
1190 can be seen in the example above for the usb subdirectory.
1191
1192 Subdirectories are visited before their parent directories.
1193
1194 --- 7.2 objhdr-y
1195
1196 objhdr-y specifies generated files to be exported.
1197 Generated files are special as they need to be looked
1198 up in another directory when doing 'make O=...' builds.
1199
1200 Example:
1201 #include/linux/Kbuild
1202 objhdr-y += version.h
1203
1204 --- 7.3 destination-y
1205
1206 When an architecture have a set of exported headers that needs to be
1207 exported to a different directory destination-y is used.
1208 destination-y specify the destination directory for all exported
1209 headers in the file where it is present.
1210
1211 Example:
1212 #arch/xtensa/platforms/s6105/include/platform/Kbuild
1213 destination-y := include/linux
1214
1215 In the example above all exported headers in the Kbuild file
1216 will be located in the directory "include/linux" when exported.
1217
1218
1219 --- 7.4 unifdef-y (deprecated)
1220
1221 unifdef-y is deprecated. A direct replacement is header-y.
1222
1146 1223
1147=== 7 Kbuild Variables 1224=== 8 Kbuild Variables
1148 1225
1149The top Makefile exports the following variables: 1226The top Makefile exports the following variables:
1150 1227
@@ -1206,7 +1283,7 @@ The top Makefile exports the following variables:
1206 INSTALL_MOD_STRIP will used as the option(s) to the strip command. 1283 INSTALL_MOD_STRIP will used as the option(s) to the strip command.
1207 1284
1208 1285
1209=== 8 Makefile language 1286=== 9 Makefile language
1210 1287
1211The kernel Makefiles are designed to be run with GNU Make. The Makefiles 1288The kernel Makefiles are designed to be run with GNU Make. The Makefiles
1212use only the documented features of GNU Make, but they do use many 1289use only the documented features of GNU Make, but they do use many
@@ -1225,14 +1302,14 @@ time the left-hand side is used.
1225There are some cases where "=" is appropriate. Usually, though, ":=" 1302There are some cases where "=" is appropriate. Usually, though, ":="
1226is the right choice. 1303is the right choice.
1227 1304
1228=== 9 Credits 1305=== 10 Credits
1229 1306
1230Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net> 1307Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net>
1231Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de> 1308Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de>
1232Updates by Sam Ravnborg <sam@ravnborg.org> 1309Updates by Sam Ravnborg <sam@ravnborg.org>
1233Language QA by Jan Engelhardt <jengelh@gmx.de> 1310Language QA by Jan Engelhardt <jengelh@gmx.de>
1234 1311
1235=== 10 TODO 1312=== 11 TODO
1236 1313
1237- Describe how kbuild supports shipped files with _shipped. 1314- Describe how kbuild supports shipped files with _shipped.
1238- Generating offset header files. 1315- Generating offset header files.
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index b1096da953c8..0767cf69c69e 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -275,7 +275,7 @@ following files:
275 275
276 KERNELDIR := /lib/modules/`uname -r`/build 276 KERNELDIR := /lib/modules/`uname -r`/build
277 all:: 277 all::
278 $(MAKE) -C $KERNELDIR M=`pwd` $@ 278 $(MAKE) -C $(KERNELDIR) M=`pwd` $@
279 279
280 # Module specific targets 280 # Module specific targets
281 genbin: 281 genbin:
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 3f4bc840da8b..cab61d842259 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -108,7 +108,7 @@ There are two possible methods of using Kdump.
108 108
1092) Or use the system kernel binary itself as dump-capture kernel and there is 1092) Or use the system kernel binary itself as dump-capture kernel and there is
110 no need to build a separate dump-capture kernel. This is possible 110 no need to build a separate dump-capture kernel. This is possible
111 only with the architecutres which support a relocatable kernel. As 111 only with the architectures which support a relocatable kernel. As
112 of today, i386, x86_64, ppc64 and ia64 architectures support relocatable 112 of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
113 kernel. 113 kernel.
114 114
@@ -222,7 +222,7 @@ Dump-capture kernel config options (Arch Dependent, ia64)
222---------------------------------------------------------- 222----------------------------------------------------------
223 223
224- No specific options are required to create a dump-capture kernel 224- No specific options are required to create a dump-capture kernel
225 for ia64, other than those specified in the arch idependent section 225 for ia64, other than those specified in the arch independent section
226 above. This means that it is possible to use the system kernel 226 above. This means that it is possible to use the system kernel
227 as a dump-capture kernel if desired. 227 as a dump-capture kernel if desired.
228 228
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index 026ec7d57384..4d04572b6549 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -269,7 +269,10 @@ Use the argument mechanism to document members or constants.
269 269
270Inside a struct description, you can use the "private:" and "public:" 270Inside a struct description, you can use the "private:" and "public:"
271comment tags. Structure fields that are inside a "private:" area 271comment tags. Structure fields that are inside a "private:" area
272are not listed in the generated output documentation. 272are not listed in the generated output documentation. The "private:"
273and "public:" tags must begin immediately following a "/*" comment
274marker. They may optionally include comments between the ":" and the
275ending "*/" marker.
273 276
274Example: 277Example:
275 278
@@ -283,7 +286,7 @@ Example:
283struct my_struct { 286struct my_struct {
284 int a; 287 int a;
285 int b; 288 int b;
286/* private: */ 289/* private: internal use only */
287 int c; 290 int c;
288}; 291};
289 292
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 54f21a5c262b..54ebf100e4e0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -17,6 +17,12 @@ are specified on the kernel command line with the module name plus
17 17
18 usbcore.blinkenlights=1 18 usbcore.blinkenlights=1
19 19
20Hyphens (dashes) and underscores are equivalent in parameter names, so
21 log_buf_len=1M print-fatal-signals=1
22can also be entered as
23 log-buf-len=1M print_fatal_signals=1
24
25
20This document may not be entirely up to date and comprehensive. The command 26This document may not be entirely up to date and comprehensive. The command
21"modinfo -p ${modulename}" shows a current list of all parameters of a loadable 27"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
22module. Loadable modules, after being loaded into the running kernel, also 28module. Loadable modules, after being loaded into the running kernel, also
@@ -42,8 +48,10 @@ parameter is applicable:
42 EFI EFI Partitioning (GPT) is enabled 48 EFI EFI Partitioning (GPT) is enabled
43 EIDE EIDE/ATAPI support is enabled. 49 EIDE EIDE/ATAPI support is enabled.
44 FB The frame buffer device is enabled. 50 FB The frame buffer device is enabled.
51 GCOV GCOV profiling is enabled.
45 HW Appropriate hardware is enabled. 52 HW Appropriate hardware is enabled.
46 IA-64 IA-64 architecture is enabled. 53 IA-64 IA-64 architecture is enabled.
54 IMA Integrity measurement architecture is enabled.
47 IOSCHED More than one I/O scheduler is enabled. 55 IOSCHED More than one I/O scheduler is enabled.
48 IP_PNP IP DHCP, BOOTP, or RARP is enabled. 56 IP_PNP IP DHCP, BOOTP, or RARP is enabled.
49 ISAPNP ISA PnP code is enabled. 57 ISAPNP ISA PnP code is enabled.
@@ -132,7 +140,7 @@ and is between 256 and 4096 characters. It is defined in the file
132./include/asm/setup.h as COMMAND_LINE_SIZE. 140./include/asm/setup.h as COMMAND_LINE_SIZE.
133 141
134 142
135 acpi= [HW,ACPI,X86-64,i386] 143 acpi= [HW,ACPI,X86]
136 Advanced Configuration and Power Interface 144 Advanced Configuration and Power Interface
137 Format: { force | off | ht | strict | noirq | rsdt } 145 Format: { force | off | ht | strict | noirq | rsdt }
138 force -- enable ACPI if default was off 146 force -- enable ACPI if default was off
@@ -151,60 +159,6 @@ and is between 256 and 4096 characters. It is defined in the file
151 1,0: use 1st APIC table 159 1,0: use 1st APIC table
152 default: 0 160 default: 0
153 161
154 acpi_sleep= [HW,ACPI] Sleep options
155 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
156 old_ordering, s4_nonvs }
157 See Documentation/power/video.txt for information on
158 s3_bios and s3_mode.
159 s3_beep is for debugging; it makes the PC's speaker beep
160 as soon as the kernel's real-mode entry point is called.
161 s4_nohwsig prevents ACPI hardware signature from being
162 used during resume from hibernation.
163 old_ordering causes the ACPI 1.0 ordering of the _PTS
164 control method, with respect to putting devices into
165 low power states, to be enforced (the ACPI 2.0 ordering
166 of _PTS is used by default).
167 s4_nonvs prevents the kernel from saving/restoring the
168 ACPI NVS memory during hibernation.
169
170 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
171 Format: { level | edge | high | low }
172
173 acpi_irq_balance [HW,ACPI]
174 ACPI will balance active IRQs
175 default in APIC mode
176
177 acpi_irq_nobalance [HW,ACPI]
178 ACPI will not move active IRQs (default)
179 default in PIC mode
180
181 acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for
182 use by PCI
183 Format: <irq>,<irq>...
184
185 acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
186 Format: <irq>,<irq>...
187
188 acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
189
190 acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
191 Format: To spoof as Windows 98: ="Microsoft Windows"
192
193 acpi_osi= [HW,ACPI] Modify list of supported OS interface strings
194 acpi_osi="string1" # add string1 -- only one string
195 acpi_osi="!string2" # remove built-in string2
196 acpi_osi= # disable all strings
197
198 acpi_serialize [HW,ACPI] force serialization of AML methods
199
200 acpi_skip_timer_override [HW,ACPI]
201 Recognize and ignore IRQ0/pin2 Interrupt Override.
202 For broken nForce2 BIOS resulting in XT-PIC timer.
203 acpi_use_timer_override [HW,ACPI]
204 Use timer override. For some broken Nvidia NF5 boards
205 that require a timer override, but don't have
206 HPET
207
208 acpi_backlight= [HW,ACPI] 162 acpi_backlight= [HW,ACPI]
209 acpi_backlight=vendor 163 acpi_backlight=vendor
210 acpi_backlight=video 164 acpi_backlight=video
@@ -212,11 +166,6 @@ and is between 256 and 4096 characters. It is defined in the file
212 (e.g. thinkpad_acpi, sony_acpi, etc.) instead 166 (e.g. thinkpad_acpi, sony_acpi, etc.) instead
213 of the ACPI video.ko driver. 167 of the ACPI video.ko driver.
214 168
215 acpi_display_output= [HW,ACPI]
216 acpi_display_output=vendor
217 acpi_display_output=video
218 See above.
219
220 acpi.debug_layer= [HW,ACPI,ACPI_DEBUG] 169 acpi.debug_layer= [HW,ACPI,ACPI_DEBUG]
221 acpi.debug_level= [HW,ACPI,ACPI_DEBUG] 170 acpi.debug_level= [HW,ACPI,ACPI_DEBUG]
222 Format: <int> 171 Format: <int>
@@ -245,6 +194,41 @@ and is between 256 and 4096 characters. It is defined in the file
245 unusable. The "log_buf_len" parameter may be useful 194 unusable. The "log_buf_len" parameter may be useful
246 if you need to capture more output. 195 if you need to capture more output.
247 196
197 acpi_display_output= [HW,ACPI]
198 acpi_display_output=vendor
199 acpi_display_output=video
200 See above.
201
202 acpi_irq_balance [HW,ACPI]
203 ACPI will balance active IRQs
204 default in APIC mode
205
206 acpi_irq_nobalance [HW,ACPI]
207 ACPI will not move active IRQs (default)
208 default in PIC mode
209
210 acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
211 Format: <irq>,<irq>...
212
213 acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for
214 use by PCI
215 Format: <irq>,<irq>...
216
217 acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
218
219 acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
220 Format: To spoof as Windows 98: ="Microsoft Windows"
221
222 acpi_osi= [HW,ACPI] Modify list of supported OS interface strings
223 acpi_osi="string1" # add string1 -- only one string
224 acpi_osi="!string2" # remove built-in string2
225 acpi_osi= # disable all strings
226
227 acpi_pm_good [X86]
228 Override the pmtimer bug detection: force the kernel
229 to assume that this machine's pmtimer latches its value
230 and always returns good values.
231
248 acpi.power_nocheck= [HW,ACPI] 232 acpi.power_nocheck= [HW,ACPI]
249 Format: 1/0 enable/disable the check of power state. 233 Format: 1/0 enable/disable the check of power state.
250 On some bogus BIOS the _PSC object/_STA object of 234 On some bogus BIOS the _PSC object/_STA object of
@@ -253,30 +237,57 @@ and is between 256 and 4096 characters. It is defined in the file
253 power state again in power transition. 237 power state again in power transition.
254 1 : disable the power state check 238 1 : disable the power state check
255 239
256 acpi_pm_good [X86-32,X86-64] 240 acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
257 Override the pmtimer bug detection: force the kernel 241 Format: { level | edge | high | low }
258 to assume that this machine's pmtimer latches its value
259 and always returns good values.
260 242
261 agp= [AGP] 243 acpi_serialize [HW,ACPI] force serialization of AML methods
262 { off | try_unsupported }
263 off: disable AGP support
264 try_unsupported: try to drive unsupported chipsets
265 (may crash computer or cause data corruption)
266 244
267 enable_timer_pin_1 [i386,x86-64] 245 acpi_skip_timer_override [HW,ACPI]
268 Enable PIN 1 of APIC timer 246 Recognize and ignore IRQ0/pin2 Interrupt Override.
269 Can be useful to work around chipset bugs 247 For broken nForce2 BIOS resulting in XT-PIC timer.
270 (in particular on some ATI chipsets).
271 The kernel tries to set a reasonable default.
272 248
273 disable_timer_pin_1 [i386,x86-64] 249 acpi_sleep= [HW,ACPI] Sleep options
274 Disable PIN 1 of APIC timer 250 Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
275 Can be useful to work around chipset bugs. 251 old_ordering, s4_nonvs }
252 See Documentation/power/video.txt for information on
253 s3_bios and s3_mode.
254 s3_beep is for debugging; it makes the PC's speaker beep
255 as soon as the kernel's real-mode entry point is called.
256 s4_nohwsig prevents ACPI hardware signature from being
257 used during resume from hibernation.
258 old_ordering causes the ACPI 1.0 ordering of the _PTS
259 control method, with respect to putting devices into
260 low power states, to be enforced (the ACPI 2.0 ordering
261 of _PTS is used by default).
262 s4_nonvs prevents the kernel from saving/restoring the
263 ACPI NVS memory during hibernation.
264
265 acpi_use_timer_override [HW,ACPI]
266 Use timer override. For some broken Nvidia NF5 boards
267 that require a timer override, but don't have HPET
268
269 acpi_enforce_resources= [ACPI]
270 { strict | lax | no }
271 Check for resource conflicts between native drivers
272 and ACPI OperationRegions (SystemIO and SystemMemory
273 only). IO ports and memory declared in ACPI might be
274 used by the ACPI subsystem in arbitrary AML code and
275 can interfere with legacy drivers.
276 strict (default): access to resources claimed by ACPI
277 is denied; legacy drivers trying to access reserved
278 resources will fail to bind to device using them.
279 lax: access to resources claimed by ACPI is allowed;
280 legacy drivers trying to access reserved resources
281 will bind successfully but a warning message is logged.
282 no: ACPI OperationRegions are not marked as reserved,
283 no further checks are performed.
276 284
277 ad1848= [HW,OSS] 285 ad1848= [HW,OSS]
278 Format: <io>,<irq>,<dma>,<dma2>,<type> 286 Format: <io>,<irq>,<dma>,<dma2>,<type>
279 287
288 add_efi_memmap [EFI; X86] Include EFI memory map in
289 kernel's map of available physical RAM.
290
280 advansys= [HW,SCSI] 291 advansys= [HW,SCSI]
281 See header of drivers/scsi/advansys.c. 292 See header of drivers/scsi/advansys.c.
282 293
@@ -287,6 +298,12 @@ and is between 256 and 4096 characters. It is defined in the file
287 Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq> 298 Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq>
288 See also header of sound/oss/aedsp16.c. 299 See also header of sound/oss/aedsp16.c.
289 300
301 agp= [AGP]
302 { off | try_unsupported }
303 off: disable AGP support
304 try_unsupported: try to drive unsupported chipsets
305 (may crash computer or cause data corruption)
306
290 aha152x= [HW,SCSI] 307 aha152x= [HW,SCSI]
291 See Documentation/scsi/aha152x.txt. 308 See Documentation/scsi/aha152x.txt.
292 309
@@ -312,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file
312 flushed before they will be reused, which 329 flushed before they will be reused, which
313 is a lot of faster 330 is a lot of faster
314 331
315 amd_iommu_size= [HW,X86-64]
316 Define the size of the aperture for the AMD IOMMU
317 driver. Possible values are:
318 '32M', '64M' (default), '128M', '256M', '512M', '1G'
319
320 amijoy.map= [HW,JOY] Amiga joystick support 332 amijoy.map= [HW,JOY] Amiga joystick support
321 Map of devices attached to JOY0DAT and JOY1DAT 333 Map of devices attached to JOY0DAT and JOY1DAT
322 Format: <a>,<b> 334 Format: <a>,<b>
@@ -334,7 +346,7 @@ and is between 256 and 4096 characters. It is defined in the file
334 not play well with APC CPU idle - disable it if you have 346 not play well with APC CPU idle - disable it if you have
335 APC and your system crashes randomly. 347 APC and your system crashes randomly.
336 348
337 apic= [APIC,i386] Advanced Programmable Interrupt Controller 349 apic= [APIC,X86-32] Advanced Programmable Interrupt Controller
338 Change the output verbosity whilst booting 350 Change the output verbosity whilst booting
339 Format: { quiet (default) | verbose | debug } 351 Format: { quiet (default) | verbose | debug }
340 Change the amount of debugging information output 352 Change the amount of debugging information output
@@ -414,12 +426,6 @@ and is between 256 and 4096 characters. It is defined in the file
414 possible to determine what the correct size should be. 426 possible to determine what the correct size should be.
415 This option provides an override for these situations. 427 This option provides an override for these situations.
416 428
417 security= [SECURITY] Choose a security module to enable at boot.
418 If this boot parameter is not specified, only the first
419 security module asking for security registration will be
420 loaded. An invalid security module name will be treated
421 as if no module has been chosen.
422
423 capability.disable= 429 capability.disable=
424 [SECURITY] Disable capabilities. This would normally 430 [SECURITY] Disable capabilities. This would normally
425 be used only if an alternative security model is to be 431 be used only if an alternative security model is to be
@@ -486,17 +492,18 @@ and is between 256 and 4096 characters. It is defined in the file
486 Also note the kernel might malfunction if you disable 492 Also note the kernel might malfunction if you disable
487 some critical bits. 493 some critical bits.
488 494
489 code_bytes [IA32/X86_64] How many bytes of object code to print 495 cmo_free_hint= [PPC] Format: { yes | no }
496 Specify whether pages are marked as being inactive
497 when they are freed. This is used in CMO environments
498 to determine OS memory pressure for page stealing by
499 a hypervisor.
500 Default: yes
501
502 code_bytes [X86] How many bytes of object code to print
490 in an oops report. 503 in an oops report.
491 Range: 0 - 8192 504 Range: 0 - 8192
492 Default: 64 505 Default: 64
493 506
494 hpet= [X86-32,HPET] option to control HPET usage
495 Format: { enable (default) | disable | force }
496 disable: disable HPET and use PIT instead
497 force: allow force enabled of undocumented chips (ICH4,
498 VIA, nVidia)
499
500 com20020= [HW,NET] ARCnet - COM20020 chipset 507 com20020= [HW,NET] ARCnet - COM20020 chipset
501 Format: 508 Format:
502 <io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]] 509 <io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
@@ -540,22 +547,9 @@ and is between 256 and 4096 characters. It is defined in the file
540 console=brl,ttyS0 547 console=brl,ttyS0
541 For now, only VisioBraille is supported. 548 For now, only VisioBraille is supported.
542 549
543 earlycon= [KNL] Output early console device and options. 550 consoleblank= [KNL] The console blank (screen saver) timeout in
544 uart[8250],io,<addr>[,options] 551 seconds. Defaults to 10*60 = 10mins. A value of 0
545 uart[8250],mmio,<addr>[,options] 552 disables the blank timer.
546 Start an early, polled-mode console on the 8250/16550
547 UART at the specified I/O port or MMIO address.
548 The options are the same as for ttyS, above.
549
550 no_console_suspend
551 [HW] Never suspend the console
552 Disable suspending of consoles during suspend and
553 hibernate operations. Once disabled, debugging
554 messages can reach various consoles while the rest
555 of the system is being put to sleep (ie, while
556 debugging driver suspend/resume hooks). This may
557 not work reliably with all consoles, but is known
558 to work with serial and VGA consoles.
559 553
560 coredump_filter= 554 coredump_filter=
561 [KNL] Change the default value for 555 [KNL] Change the default value for
@@ -604,36 +598,22 @@ and is between 256 and 4096 characters. It is defined in the file
604 598
605 debug_objects [KNL] Enable object debugging 599 debug_objects [KNL] Enable object debugging
606 600
601 no_debug_objects
602 [KNL] Disable object debugging
603
607 debugpat [X86] Enable PAT debugging 604 debugpat [X86] Enable PAT debugging
608 605
609 decnet.addr= [HW,NET] 606 decnet.addr= [HW,NET]
610 Format: <area>[,<node>] 607 Format: <area>[,<node>]
611 See also Documentation/networking/decnet.txt. 608 See also Documentation/networking/decnet.txt.
612 609
613 vt.default_blu= [VT] 610 default_hugepagesz=
614 Format: <blue0>,<blue1>,<blue2>,...,<blue15> 611 [same as hugepagesz=] The size of the default
615 Change the default blue palette of the console. 612 HugeTLB page size. This is the size represented by
616 This is a 16-member array composed of values 613 the legacy /proc/ hugepages APIs, used for SHM, and
617 ranging from 0-255. 614 default size when mounting hugetlbfs filesystems.
618 615 Defaults to the default architecture's huge page size
619 vt.default_grn= [VT] 616 if not specified.
620 Format: <green0>,<green1>,<green2>,...,<green15>
621 Change the default green palette of the console.
622 This is a 16-member array composed of values
623 ranging from 0-255.
624
625 vt.default_red= [VT]
626 Format: <red0>,<red1>,<red2>,...,<red15>
627 Change the default red palette of the console.
628 This is a 16-member array composed of values
629 ranging from 0-255.
630
631 vt.default_utf8=
632 [VT]
633 Format=<0|1>
634 Set system-wide default UTF-8 mode for all tty's.
635 Default is 1, i.e. UTF-8 mode is enabled for all
636 newly opened terminals.
637 617
638 dhash_entries= [KNL] 618 dhash_entries= [KNL]
639 Set number of hash buckets for dentry cache. 619 Set number of hash buckets for dentry cache.
@@ -646,27 +626,9 @@ and is between 256 and 4096 characters. It is defined in the file
646 Documentation/serial/digiepca.txt. 626 Documentation/serial/digiepca.txt.
647 627
648 disable_mtrr_cleanup [X86] 628 disable_mtrr_cleanup [X86]
649 enable_mtrr_cleanup [X86]
650 The kernel tries to adjust MTRR layout from continuous 629 The kernel tries to adjust MTRR layout from continuous
651 to discrete, to make X server driver able to add WB 630 to discrete, to make X server driver able to add WB
652 entry later. This parameter enables/disables that. 631 entry later. This parameter disables that.
653
654 mtrr_chunk_size=nn[KMG] [X86]
655 used for mtrr cleanup. It is largest continous chunk
656 that could hold holes aka. UC entries.
657
658 mtrr_gran_size=nn[KMG] [X86]
659 Used for mtrr cleanup. It is granularity of mtrr block.
660 Default is 1.
661 Large value could prevent small alignment from
662 using up MTRRs.
663
664 mtrr_spare_reg_nr=n [X86]
665 Format: <integer>
666 Range: 0,7 : spare reg number
667 Default : 1
668 Used for mtrr cleanup. It is spare mtrr entries number.
669 Set to 2 or more if your graphical card needs more.
670 632
671 disable_mtrr_trim [X86, Intel and AMD only] 633 disable_mtrr_trim [X86, Intel and AMD only]
672 By default the kernel will trim any uncacheable 634 By default the kernel will trim any uncacheable
@@ -674,13 +636,46 @@ and is between 256 and 4096 characters. It is defined in the file
674 MTRR settings. This parameter disables that behavior, 636 MTRR settings. This parameter disables that behavior,
675 possibly causing your machine to run very slowly. 637 possibly causing your machine to run very slowly.
676 638
639 disable_timer_pin_1 [X86]
640 Disable PIN 1 of APIC timer
641 Can be useful to work around chipset bugs.
642
677 dmasound= [HW,OSS] Sound subsystem buffers 643 dmasound= [HW,OSS] Sound subsystem buffers
678 644
645 dma_debug=off If the kernel is compiled with DMA_API_DEBUG support,
646 this option disables the debugging code at boot.
647
648 dma_debug_entries=<number>
649 This option allows to tune the number of preallocated
650 entries for DMA-API debugging code. One entry is
651 required per DMA-API allocation. Use this if the
652 DMA-API debugging code disables itself because the
653 architectural default is too low.
654
655 dma_debug_driver=<driver_name>
656 With this option the DMA-API debugging driver
657 filter feature can be enabled at boot time. Just
658 pass the driver to filter for as the parameter.
659 The filter can be disabled or changed to another
660 driver later using sysfs.
661
679 dscc4.setup= [NET] 662 dscc4.setup= [NET]
680 663
681 dtc3181e= [HW,SCSI] 664 dtc3181e= [HW,SCSI]
682 665
683 earlyprintk= [X86-32,X86-64,SH,BLACKFIN] 666 dynamic_printk Enables pr_debug()/dev_dbg() calls if
667 CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
668 These can also be switched on/off via
669 <debugfs>/dynamic_printk/modules
670
671 earlycon= [KNL] Output early console device and options.
672 uart[8250],io,<addr>[,options]
673 uart[8250],mmio,<addr>[,options]
674 Start an early, polled-mode console on the 8250/16550
675 UART at the specified I/O port or MMIO address.
676 The options are the same as for ttyS, above.
677
678 earlyprintk= [X86,SH,BLACKFIN]
684 earlyprintk=vga 679 earlyprintk=vga
685 earlyprintk=serial[,ttySn[,baudrate]] 680 earlyprintk=serial[,ttySn[,baudrate]]
686 earlyprintk=dbgp 681 earlyprintk=dbgp
@@ -715,12 +710,23 @@ and is between 256 and 4096 characters. It is defined in the file
715 See Documentation/block/as-iosched.txt and 710 See Documentation/block/as-iosched.txt and
716 Documentation/block/deadline-iosched.txt for details. 711 Documentation/block/deadline-iosched.txt for details.
717 712
718 elfcorehdr= [IA64,PPC,SH,X86-32,X86_64] 713 elfcorehdr= [IA64,PPC,SH,X86]
719 Specifies physical address of start of kernel core 714 Specifies physical address of start of kernel core
720 image elf header. Generally kexec loader will 715 image elf header. Generally kexec loader will
721 pass this option to capture kernel. 716 pass this option to capture kernel.
722 See Documentation/kdump/kdump.txt for details. 717 See Documentation/kdump/kdump.txt for details.
723 718
719 enable_mtrr_cleanup [X86]
720 The kernel tries to adjust MTRR layout from continuous
721 to discrete, to make X server driver able to add WB
722 entry later. This parameter enables that.
723
724 enable_timer_pin_1 [X86]
725 Enable PIN 1 of APIC timer
726 Can be useful to work around chipset bugs
727 (in particular on some ATI chipsets).
728 The kernel tries to set a reasonable default.
729
724 enforcing [SELINUX] Set initial enforcing status. 730 enforcing [SELINUX] Set initial enforcing status.
725 Format: {"0" | "1"} 731 Format: {"0" | "1"}
726 See security/selinux/Kconfig help text. 732 See security/selinux/Kconfig help text.
@@ -759,12 +765,25 @@ and is between 256 and 4096 characters. It is defined in the file
759 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. 765 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
760 766
761 ftrace=[tracer] 767 ftrace=[tracer]
762 [ftrace] will set and start the specified tracer 768 [FTRACE] will set and start the specified tracer
763 as early as possible in order to facilitate early 769 as early as possible in order to facilitate early
764 boot debugging. 770 boot debugging.
765 771
766 ftrace_dump_on_oops 772 ftrace_dump_on_oops
767 [ftrace] will dump the trace buffers on oops. 773 [FTRACE] will dump the trace buffers on oops.
774
775 ftrace_filter=[function-list]
776 [FTRACE] Limit the functions traced by the function
777 tracer at boot up. function-list is a comma separated
778 list of functions. This list can be changed at run
779 time by the set_ftrace_filter file in the debugfs
780 tracing directory.
781
782 ftrace_notrace=[function-list]
783 [FTRACE] Do not trace the functions specified in
784 function-list. This list can be changed at run time
785 by the set_ftrace_notrace file in the debugfs
786 tracing directory.
768 787
769 gamecon.map[2|3]= 788 gamecon.map[2|3]=
770 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 789 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -778,6 +797,12 @@ and is between 256 and 4096 characters. It is defined in the file
778 Format: off | on 797 Format: off | on
779 default: on 798 default: on
780 799
800 gcov_persist= [GCOV] When non-zero (default), profiling data for
801 kernel modules is saved and remains accessible via
802 debugfs, even when the module is unloaded/reloaded.
803 When zero, profiling data is discarded and associated
804 debugfs files are removed at module unload time.
805
781 gdth= [HW,SCSI] 806 gdth= [HW,SCSI]
782 See header of drivers/scsi/gdth.c. 807 See header of drivers/scsi/gdth.c.
783 808
@@ -788,7 +813,7 @@ and is between 256 and 4096 characters. It is defined in the file
788 813
789 hashdist= [KNL,NUMA] Large hashes allocated during boot 814 hashdist= [KNL,NUMA] Large hashes allocated during boot
790 are distributed across NUMA nodes. Defaults on 815 are distributed across NUMA nodes. Defaults on
791 for IA-64, off otherwise. 816 for 64bit NUMA, off otherwise.
792 Format: 0 | 1 (for off | on) 817 Format: 0 | 1 (for off | on)
793 818
794 hcl= [IA-64] SGI's Hardware Graph compatibility layer 819 hcl= [IA-64] SGI's Hardware Graph compatibility layer
@@ -808,6 +833,16 @@ and is between 256 and 4096 characters. It is defined in the file
808 hisax= [HW,ISDN] 833 hisax= [HW,ISDN]
809 See Documentation/isdn/README.HiSax. 834 See Documentation/isdn/README.HiSax.
810 835
836 hlt [BUGS=ARM,SH]
837
838 hpet= [X86-32,HPET] option to control HPET usage
839 Format: { enable (default) | disable | force |
840 verbose }
841 disable: disable HPET and use PIT instead
842 force: allow force enabled of undocumented chips (ICH4,
843 VIA, nVidia)
844 verbose: show contents of HPET registers during setup
845
811 hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. 846 hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
812 hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. 847 hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
813 On x86-64 and powerpc, this option can be specified 848 On x86-64 and powerpc, this option can be specified
@@ -817,18 +852,18 @@ and is between 256 and 4096 characters. It is defined in the file
817 (when the CPU supports the "pdpe1gb" cpuinfo flag) 852 (when the CPU supports the "pdpe1gb" cpuinfo flag)
818 Note that 1GB pages can only be allocated at boot time 853 Note that 1GB pages can only be allocated at boot time
819 using hugepages= and not freed afterwards. 854 using hugepages= and not freed afterwards.
820 default_hugepagesz=
821 [same as hugepagesz=] The size of the default
822 HugeTLB page size. This is the size represented by
823 the legacy /proc/ hugepages APIs, used for SHM, and
824 default size when mounting hugetlbfs filesystems.
825 Defaults to the default architecture's huge page size
826 if not specified.
827
828 hlt [BUGS=ARM,SH]
829 855
830 hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC) 856 hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
831 terminal devices. Valid values: 0..8 857 terminal devices. Valid values: 0..8
858 hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs.
859 If specified, z/VM IUCV HVC accepts connections
860 from listed z/VM user IDs only.
861
862 i2c_bus= [HW] Override the default board specific I2C bus speed
863 or register an additional I2C bus that is not
864 registered from board initialization code.
865 Format:
866 <bus_id>,<clkrate>
832 867
833 i8042.debug [HW] Toggle i8042 debug mode 868 i8042.debug [HW] Toggle i8042 debug mode
834 i8042.direct [HW] Put keyboard port into non-translated mode 869 i8042.direct [HW] Put keyboard port into non-translated mode
@@ -870,12 +905,12 @@ and is between 256 and 4096 characters. It is defined in the file
870 905
871 ide-core.nodma= [HW] (E)IDE subsystem 906 ide-core.nodma= [HW] (E)IDE subsystem
872 Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc 907 Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
873 .vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom 908 .vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
874 .chs .ignore_cable are additional options 909 .cdrom .chs .ignore_cable are additional options
875 See Documentation/ide/ide.txt. 910 See Documentation/ide/ide.txt.
876 911
877 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed 912 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
878 See Documentation/ide/ide.txt. 913 Claim all unknown PCI IDE storage controllers.
879 914
880 idle= [X86] 915 idle= [X86]
881 Format: idle=poll, idle=mwait, idle=halt, idle=nomwait 916 Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
@@ -892,9 +927,6 @@ and is between 256 and 4096 characters. It is defined in the file
892 In such case C2/C3 won't be used again. 927 In such case C2/C3 won't be used again.
893 idle=nomwait: Disable mwait for CPU C-states 928 idle=nomwait: Disable mwait for CPU C-states
894 929
895 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
896 Claim all unknown PCI IDE storage controllers.
897
898 ignore_loglevel [KNL] 930 ignore_loglevel [KNL]
899 Ignore loglevel setting - this will print /all/ 931 Ignore loglevel setting - this will print /all/
900 kernel messages to the console. Useful for debugging. 932 kernel messages to the console. Useful for debugging.
@@ -902,6 +934,21 @@ and is between 256 and 4096 characters. It is defined in the file
902 ihash_entries= [KNL] 934 ihash_entries= [KNL]
903 Set number of hash buckets for inode cache. 935 Set number of hash buckets for inode cache.
904 936
937 ima_audit= [IMA]
938 Format: { "0" | "1" }
939 0 -- integrity auditing messages. (Default)
940 1 -- enable informational integrity auditing messages.
941
942 ima_hash= [IMA]
943 Formt: { "sha1" | "md5" }
944 default: "sha1"
945
946 ima_tcb [IMA]
947 Load a policy which meets the needs of the Trusted
948 Computing Base. This means IMA will measure all
949 programs exec'd, files mmap'd for exec, and all files
950 opened for read by uid=0.
951
905 in2000= [HW,SCSI] 952 in2000= [HW,SCSI]
906 See header of drivers/scsi/in2000.c. 953 See header of drivers/scsi/in2000.c.
907 954
@@ -919,25 +966,6 @@ and is between 256 and 4096 characters. It is defined in the file
919 inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver 966 inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
920 Format: <irq> 967 Format: <irq>
921 968
922 inttest= [IA64]
923
924 iomem= Disable strict checking of access to MMIO memory
925 strict regions from userspace.
926 relaxed
927
928 iommu= [x86]
929 off
930 force
931 noforce
932 biomerge
933 panic
934 nopanic
935 merge
936 nomerge
937 forcesac
938 soft
939
940
941 intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option 969 intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
942 on 970 on
943 Enable intel iommu driver. 971 Enable intel iommu driver.
@@ -961,7 +989,29 @@ and is between 256 and 4096 characters. It is defined in the file
961 result in a hardware IOTLB flush operation as opposed 989 result in a hardware IOTLB flush operation as opposed
962 to batching them for performance. 990 to batching them for performance.
963 991
964 io_delay= [X86-32,X86-64] I/O delay method 992 inttest= [IA64]
993
994 iomem= Disable strict checking of access to MMIO memory
995 strict regions from userspace.
996 relaxed
997
998 iommu= [x86]
999 off
1000 force
1001 noforce
1002 biomerge
1003 panic
1004 nopanic
1005 merge
1006 nomerge
1007 forcesac
1008 soft
1009
1010 io7= [HW] IO7 for Marvel based alpha systems
1011 See comment before marvel_specify_io7 in
1012 arch/alpha/kernel/core_marvel.c.
1013
1014 io_delay= [X86] I/O delay method
965 0x80 1015 0x80
966 Standard port 0x80 based delay 1016 Standard port 0x80 based delay
967 0xed 1017 0xed
@@ -971,10 +1021,6 @@ and is between 256 and 4096 characters. It is defined in the file
971 none 1021 none
972 No delay 1022 No delay
973 1023
974 io7= [HW] IO7 for Marvel based alpha systems
975 See comment before marvel_specify_io7 in
976 arch/alpha/kernel/core_marvel.c.
977
978 ip= [IP_PNP] 1024 ip= [IP_PNP]
979 See Documentation/filesystems/nfsroot.txt. 1025 See Documentation/filesystems/nfsroot.txt.
980 1026
@@ -985,12 +1031,6 @@ and is between 256 and 4096 characters. It is defined in the file
985 ips= [HW,SCSI] Adaptec / IBM ServeRAID controller 1031 ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
986 See header of drivers/scsi/ips.c. 1032 See header of drivers/scsi/ips.c.
987 1033
988 ports= [IP_VS_FTP] IPVS ftp helper module
989 Default is 21.
990 Up to 8 (IP_VS_APP_MAX_PORTS) ports
991 may be specified.
992 Format: <port>,<port>....
993
994 irqfixup [HW] 1034 irqfixup [HW]
995 When an interrupt is not handled search all handlers 1035 When an interrupt is not handled search all handlers
996 for it. Intended to get systems with badly broken 1036 for it. Intended to get systems with badly broken
@@ -1031,7 +1071,9 @@ and is between 256 and 4096 characters. It is defined in the file
1031 js= [HW,JOY] Analog joystick 1071 js= [HW,JOY] Analog joystick
1032 See Documentation/input/joystick.txt. 1072 See Documentation/input/joystick.txt.
1033 1073
1034 kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter 1074 keepinitrd [HW,ARM]
1075
1076 kernelcore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter
1035 specifies the amount of memory usable by the kernel 1077 specifies the amount of memory usable by the kernel
1036 for non-movable allocations. The requested amount is 1078 for non-movable allocations. The requested amount is
1037 spread evenly throughout all nodes in the system. The 1079 spread evenly throughout all nodes in the system. The
@@ -1047,30 +1089,22 @@ and is between 256 and 4096 characters. It is defined in the file
1047 use the HighMem zone if it exists, and the Normal 1089 use the HighMem zone if it exists, and the Normal
1048 zone if it does not. 1090 zone if it does not.
1049 1091
1050 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
1051 is similar to kernelcore except it specifies the
1052 amount of memory used for migratable allocations.
1053 If both kernelcore and movablecore is specified,
1054 then kernelcore will be at *least* the specified
1055 value but may be more. If movablecore on its own
1056 is specified, the administrator must be careful
1057 that the amount of memory usable for all allocations
1058 is not too small.
1059
1060 keepinitrd [HW,ARM]
1061
1062 kstack=N [X86-32,X86-64] Print N words from the kernel stack
1063 in oops dumps.
1064
1065 kgdboc= [HW] kgdb over consoles. 1092 kgdboc= [HW] kgdb over consoles.
1066 Requires a tty driver that supports console polling. 1093 Requires a tty driver that supports console polling.
1067 (only serial suported for now) 1094 (only serial supported for now)
1068 Format: <serial_device>[,baud] 1095 Format: <serial_device>[,baud]
1069 1096
1070 kmac= [MIPS] korina ethernet MAC address. 1097 kmac= [MIPS] korina ethernet MAC address.
1071 Configure the RouterBoard 532 series on-chip 1098 Configure the RouterBoard 532 series on-chip
1072 Ethernet adapter MAC address. 1099 Ethernet adapter MAC address.
1073 1100
1101 kmemleak= [KNL] Boot-time kmemleak enable/disable
1102 Valid arguments: on, off
1103 Default: on
1104
1105 kstack=N [X86] Print N words from the kernel stack
1106 in oops dumps.
1107
1074 l2cr= [PPC] 1108 l2cr= [PPC]
1075 1109
1076 l3cr= [PPC] 1110 l3cr= [PPC]
@@ -1078,7 +1112,7 @@ and is between 256 and 4096 characters. It is defined in the file
1078 lapic [X86-32,APIC] Enable the local APIC even if BIOS 1112 lapic [X86-32,APIC] Enable the local APIC even if BIOS
1079 disabled it. 1113 disabled it.
1080 1114
1081 lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer 1115 lapic_timer_c2_ok [X86,APIC] trust the local apic timer
1082 in C2 power state. 1116 in C2 power state.
1083 1117
1084 libata.dma= [LIBATA] DMA control 1118 libata.dma= [LIBATA] DMA control
@@ -1216,9 +1250,8 @@ and is between 256 and 4096 characters. It is defined in the file
1216 (machvec) in a generic kernel. 1250 (machvec) in a generic kernel.
1217 Example: machvec=hpzx1_swiotlb 1251 Example: machvec=hpzx1_swiotlb
1218 1252
1219 max_loop= [LOOP] Maximum number of loopback devices that can 1253 max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
1220 be mounted 1254 than or equal to this physical address is ignored.
1221 Format: <1-256>
1222 1255
1223 maxcpus= [SMP] Maximum number of processors that an SMP kernel 1256 maxcpus= [SMP] Maximum number of processors that an SMP kernel
1224 should make use of. maxcpus=n : n >= 0 limits the 1257 should make use of. maxcpus=n : n >= 0 limits the
@@ -1226,8 +1259,9 @@ and is between 256 and 4096 characters. It is defined in the file
1226 it is equivalent to "nosmp", which also disables 1259 it is equivalent to "nosmp", which also disables
1227 the IO APIC. 1260 the IO APIC.
1228 1261
1229 max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater than 1262 max_loop= [LOOP] Maximum number of loopback devices that can
1230 or equal to this physical address is ignored. 1263 be mounted
1264 Format: <1-256>
1231 1265
1232 max_luns= [SCSI] Maximum number of LUNs to probe. 1266 max_luns= [SCSI] Maximum number of LUNs to probe.
1233 Should be between 1 and 2^32-1. 1267 Should be between 1 and 2^32-1.
@@ -1263,7 +1297,7 @@ and is between 256 and 4096 characters. It is defined in the file
1263 [KNL,SH] Allow user to override the default size for 1297 [KNL,SH] Allow user to override the default size for
1264 per-device physically contiguous DMA buffers. 1298 per-device physically contiguous DMA buffers.
1265 1299
1266 memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact 1300 memmap=exactmap [KNL,X86] Enable setting of an exact
1267 E820 memory map, as specified by the user. 1301 E820 memory map, as specified by the user.
1268 Such memmap=exactmap lines can be constructed based on 1302 Such memmap=exactmap lines can be constructed based on
1269 BIOS output or other requirements. See the memmap=nn@ss 1303 BIOS output or other requirements. See the memmap=nn@ss
@@ -1310,8 +1344,13 @@ and is between 256 and 4096 characters. It is defined in the file
1310 1344
1311 memtest= [KNL,X86] Enable memtest 1345 memtest= [KNL,X86] Enable memtest
1312 Format: <integer> 1346 Format: <integer>
1313 range: 0,4 : pattern number
1314 default : 0 <disable> 1347 default : 0 <disable>
1348 Specifies the number of memtest passes to be
1349 performed. Each pass selects another test
1350 pattern from a given set of patterns. Memtest
1351 fills the memory with this pattern, validates
1352 memory contents and reserves bad memory
1353 regions that are detected.
1315 1354
1316 meye.*= [HW] Set MotionEye Camera parameters 1355 meye.*= [HW] Set MotionEye Camera parameters
1317 See Documentation/video4linux/meye.txt. 1356 See Documentation/video4linux/meye.txt.
@@ -1330,6 +1369,27 @@ and is between 256 and 4096 characters. It is defined in the file
1330 min_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory below this 1369 min_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory below this
1331 physical address is ignored. 1370 physical address is ignored.
1332 1371
1372 mini2440= [ARM,HW,KNL]
1373 Format:[0..2][b][c][t]
1374 Default: "0tb"
1375 MINI2440 configuration specification:
1376 0 - The attached screen is the 3.5" TFT
1377 1 - The attached screen is the 7" TFT
1378 2 - The VGA Shield is attached (1024x768)
1379 Leaving out the screen size parameter will not load
1380 the TFT driver, and the framebuffer will be left
1381 unconfigured.
1382 b - Enable backlight. The TFT backlight pin will be
1383 linked to the kernel VESA blanking code and a GPIO
1384 LED. This parameter is not necessary when using the
1385 VGA shield.
1386 c - Enable the s3c camera interface.
1387 t - Reserved for enabling touchscreen support. The
1388 touchscreen support is not enabled in the mainstream
1389 kernel as of 2.6.30, a preliminary port can be found
1390 in the "bleeding edge" mini2440 support kernel at
1391 http://repo.or.cz/w/linux-2.6/mini2440.git
1392
1333 mminit_loglevel= 1393 mminit_loglevel=
1334 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this 1394 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
1335 parameter allows control of the logging verbosity for 1395 parameter allows control of the logging verbosity for
@@ -1349,6 +1409,16 @@ and is between 256 and 4096 characters. It is defined in the file
1349 mousedev.yres= [MOUSE] Vertical screen resolution, used for devices 1409 mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
1350 reporting absolute coordinates, such as tablets 1410 reporting absolute coordinates, such as tablets
1351 1411
1412 movablecore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter
1413 is similar to kernelcore except it specifies the
1414 amount of memory used for migratable allocations.
1415 If both kernelcore and movablecore is specified,
1416 then kernelcore will be at *least* the specified
1417 value but may be more. If movablecore on its own
1418 is specified, the administrator must be careful
1419 that the amount of memory usable for all allocations
1420 is not too small.
1421
1352 mpu401= [HW,OSS] 1422 mpu401= [HW,OSS]
1353 Format: <io>,<irq> 1423 Format: <io>,<irq>
1354 1424
@@ -1361,6 +1431,16 @@ and is between 256 and 4096 characters. It is defined in the file
1361 mtdparts= [MTD] 1431 mtdparts= [MTD]
1362 See drivers/mtd/cmdlinepart.c. 1432 See drivers/mtd/cmdlinepart.c.
1363 1433
1434 onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration
1435
1436 Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
1437
1438 boundary - index of last SLC block on Flex-OneNAND.
1439 The remaining blocks are configured as MLC blocks.
1440 lock - Configure if Flex-OneNAND boundary should be locked.
1441 Once locked, the boundary cannot be changed.
1442 1 indicates lock status, 0 indicates unlock status.
1443
1364 mtdset= [ARM] 1444 mtdset= [ARM]
1365 ARM/S3C2412 JIVE boot control 1445 ARM/S3C2412 JIVE boot control
1366 1446
@@ -1370,6 +1450,23 @@ and is between 256 and 4096 characters. It is defined in the file
1370 [HW] Make the MicroTouch USB driver use raw coordinates 1450 [HW] Make the MicroTouch USB driver use raw coordinates
1371 ('y', default) or cooked coordinates ('n') 1451 ('y', default) or cooked coordinates ('n')
1372 1452
1453 mtrr_chunk_size=nn[KMG] [X86]
1454 used for mtrr cleanup. It is largest continuous chunk
1455 that could hold holes aka. UC entries.
1456
1457 mtrr_gran_size=nn[KMG] [X86]
1458 Used for mtrr cleanup. It is granularity of mtrr block.
1459 Default is 1.
1460 Large value could prevent small alignment from
1461 using up MTRRs.
1462
1463 mtrr_spare_reg_nr=n [X86]
1464 Format: <integer>
1465 Range: 0,7 : spare reg number
1466 Default : 1
1467 Used for mtrr cleanup. It is spare mtrr entries number.
1468 Set to 2 or more if your graphical card needs more.
1469
1373 n2= [NET] SDL Inc. RISCom/N2 synchronous serial card 1470 n2= [NET] SDL Inc. RISCom/N2 synchronous serial card
1374 1471
1375 NCR_D700= [HW,SCSI] 1472 NCR_D700= [HW,SCSI]
@@ -1424,17 +1521,19 @@ and is between 256 and 4096 characters. It is defined in the file
1424 when a NMI is triggered. 1521 when a NMI is triggered.
1425 Format: [state][,regs][,debounce][,die] 1522 Format: [state][,regs][,debounce][,die]
1426 1523
1427 nmi_watchdog= [KNL,BUGS=X86-32,X86-64] Debugging features for SMP kernels 1524 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
1428 Format: [panic,][num] 1525 Format: [panic,][num]
1429 Valid num: 0,1,2 1526 Valid num: 0,1,2
1430 0 - turn nmi_watchdog off 1527 0 - turn nmi_watchdog off
1431 1 - use the IO-APIC timer for the NMI watchdog 1528 1 - use the IO-APIC timer for the NMI watchdog
1432 2 - use the local APIC for the NMI watchdog using 1529 2 - use the local APIC for the NMI watchdog using
1433 a performance counter. Note: This will use one performance 1530 a performance counter. Note: This will use one
1434 counter and the local APIC's performance vector. 1531 performance counter and the local APIC's performance
1435 When panic is specified panic when an NMI watchdog timeout occurs. 1532 vector.
1436 This is useful when you use a panic=... timeout and need the box 1533 When panic is specified, panic when an NMI watchdog
1437 quickly up again. 1534 timeout occurs.
1535 This is useful when you use a panic=... timeout and
1536 need the box quickly up again.
1438 Instead of 1 and 2 it is possible to use the following 1537 Instead of 1 and 2 it is possible to use the following
1439 symbolic names: lapic and ioapic 1538 symbolic names: lapic and ioapic
1440 Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic 1539 Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
@@ -1443,6 +1542,16 @@ and is between 256 and 4096 characters. It is defined in the file
1443 emulation library even if a 387 maths coprocessor 1542 emulation library even if a 387 maths coprocessor
1444 is present. 1543 is present.
1445 1544
1545 no_console_suspend
1546 [HW] Never suspend the console
1547 Disable suspending of consoles during suspend and
1548 hibernate operations. Once disabled, debugging
1549 messages can reach various consoles while the rest
1550 of the system is being put to sleep (ie, while
1551 debugging driver suspend/resume hooks). This may
1552 not work reliably with all consoles, but is known
1553 to work with serial and VGA consoles.
1554
1446 noaliencache [MM, NUMA, SLAB] Disables the allocation of alien 1555 noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
1447 caches in the slab allocator. Saves per-node memory, 1556 caches in the slab allocator. Saves per-node memory,
1448 but will impact performance. 1557 but will impact performance.
@@ -1457,17 +1566,19 @@ and is between 256 and 4096 characters. It is defined in the file
1457 1566
1458 nocache [ARM] 1567 nocache [ARM]
1459 1568
1569 noclflush [BUGS=X86] Don't use the CLFLUSH instruction
1570
1460 nodelayacct [KNL] Disable per-task delay accounting 1571 nodelayacct [KNL] Disable per-task delay accounting
1461 1572
1462 nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. 1573 nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
1463 1574
1464 nodsp [SH] Disable hardware DSP at boot time. 1575 nodsp [SH] Disable hardware DSP at boot time.
1465 1576
1466 noefi [X86-32,X86-64] Disable EFI runtime services support. 1577 noefi [X86] Disable EFI runtime services support.
1467 1578
1468 noexec [IA-64] 1579 noexec [IA-64]
1469 1580
1470 noexec [X86-32,X86-64] 1581 noexec [X86]
1471 On X86-32 available only on PAE configured kernels. 1582 On X86-32 available only on PAE configured kernels.
1472 noexec=on: enable non-executable mappings (default) 1583 noexec=on: enable non-executable mappings (default)
1473 noexec=off: disable non-executable mappings 1584 noexec=off: disable non-executable mappings
@@ -1485,9 +1596,13 @@ and is between 256 and 4096 characters. It is defined in the file
1485 register save and restore. The kernel will only save 1596 register save and restore. The kernel will only save
1486 legacy floating-point registers on task switch. 1597 legacy floating-point registers on task switch.
1487 1598
1488 noclflush [BUGS=X86] Don't use the CLFLUSH instruction 1599 noxsave [BUGS=X86] Disables x86 extended register state save
1600 and restore using xsave. The kernel will fallback to
1601 enabling legacy floating-point and sse state.
1489 1602
1490 nohlt [BUGS=ARM,SH] 1603 nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
1604 wfi(ARM) instruction doesn't work correctly and not to
1605 use it. This is also useful when using JTAG debugger.
1491 1606
1492 no-hlt [BUGS=X86-32] Tells the kernel that the hlt 1607 no-hlt [BUGS=X86-32] Tells the kernel that the hlt
1493 instruction doesn't work correctly and not to 1608 instruction doesn't work correctly and not to
@@ -1508,10 +1623,12 @@ and is between 256 and 4096 characters. It is defined in the file
1508 Valid arguments: on, off 1623 Valid arguments: on, off
1509 Default: on 1624 Default: on
1510 1625
1626 noiotrap [SH] Disables trapped I/O port accesses.
1627
1511 noirqdebug [X86-32] Disables the code which attempts to detect and 1628 noirqdebug [X86-32] Disables the code which attempts to detect and
1512 disable unhandled interrupt sources. 1629 disable unhandled interrupt sources.
1513 1630
1514 no_timer_check [X86-32,X86_64,APIC] Disables the code which tests for 1631 no_timer_check [X86,APIC] Disables the code which tests for
1515 broken timer IRQ sources. 1632 broken timer IRQ sources.
1516 1633
1517 noisapnp [ISAPNP] Disables ISA PnP code. 1634 noisapnp [ISAPNP] Disables ISA PnP code.
@@ -1519,6 +1636,9 @@ and is between 256 and 4096 characters. It is defined in the file
1519 noinitrd [RAM] Tells the kernel not to load any configured 1636 noinitrd [RAM] Tells the kernel not to load any configured
1520 initial RAM disk. 1637 initial RAM disk.
1521 1638
1639 nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
1640 remapping.
1641
1522 nointroute [IA-64] 1642 nointroute [IA-64]
1523 1643
1524 nojitter [IA64] Disables jitter checking for ITC timers. 1644 nojitter [IA64] Disables jitter checking for ITC timers.
@@ -1527,12 +1647,6 @@ and is between 256 and 4096 characters. It is defined in the file
1527 1647
1528 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1648 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1529 1649
1530 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1531
1532 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
1533 default x2apic cluster mode on platforms
1534 supporting x2apic.
1535
1536 noltlbs [PPC] Do not use large page/tlb entries for kernel 1650 noltlbs [PPC] Do not use large page/tlb entries for kernel
1537 lowmem mapping on PPC40x. 1651 lowmem mapping on PPC40x.
1538 1652
@@ -1543,6 +1657,9 @@ and is between 256 and 4096 characters. It is defined in the file
1543 nomfgpt [X86-32] Disable Multi-Function General Purpose 1657 nomfgpt [X86-32] Disable Multi-Function General Purpose
1544 Timer usage (for AMD Geode machines). 1658 Timer usage (for AMD Geode machines).
1545 1659
1660 norandmaps Don't use address space randomization. Equivalent to
1661 echo 0 > /proc/sys/kernel/randomize_va_space
1662
1546 noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops 1663 noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
1547 1664
1548 noreplace-smp [X86-32,SMP] Don't replace SMP instructions 1665 noreplace-smp [X86-32,SMP] Don't replace SMP instructions
@@ -1567,7 +1684,7 @@ and is between 256 and 4096 characters. It is defined in the file
1567 nosoftlockup [KNL] Disable the soft-lockup detector. 1684 nosoftlockup [KNL] Disable the soft-lockup detector.
1568 1685
1569 noswapaccount [KNL] Disable accounting of swap in memory resource 1686 noswapaccount [KNL] Disable accounting of swap in memory resource
1570 controller. (See Documentation/controllers/memory.txt) 1687 controller. (See Documentation/cgroups/memory.txt)
1571 1688
1572 nosync [HW,M68K] Disables sync negotiation for all devices. 1689 nosync [HW,M68K] Disables sync negotiation for all devices.
1573 1690
@@ -1577,17 +1694,19 @@ and is between 256 and 4096 characters. It is defined in the file
1577 1694
1578 nowb [ARM] 1695 nowb [ARM]
1579 1696
1697 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1698
1580 nptcg= [IA64] Override max number of concurrent global TLB 1699 nptcg= [IA64] Override max number of concurrent global TLB
1581 purges which is reported from either PAL_VM_SUMMARY or 1700 purges which is reported from either PAL_VM_SUMMARY or
1582 SAL PALO. 1701 SAL PALO.
1583 1702
1703 nr_uarts= [SERIAL] maximum number of UARTs to be registered.
1704
1584 numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. 1705 numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
1585 one of ['zone', 'node', 'default'] can be specified 1706 one of ['zone', 'node', 'default'] can be specified
1586 This can be set from sysctl after boot. 1707 This can be set from sysctl after boot.
1587 See Documentation/sysctl/vm.txt for details. 1708 See Documentation/sysctl/vm.txt for details.
1588 1709
1589 nr_uarts= [SERIAL] maximum number of UARTs to be registered.
1590
1591 ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. 1710 ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
1592 See Documentation/debugging-via-ohci1394.txt for more 1711 See Documentation/debugging-via-ohci1394.txt for more
1593 info. 1712 info.
@@ -1605,6 +1724,14 @@ and is between 256 and 4096 characters. It is defined in the file
1605 oprofile.timer= [HW] 1724 oprofile.timer= [HW]
1606 Use timer interrupt instead of performance counters 1725 Use timer interrupt instead of performance counters
1607 1726
1727 oprofile.cpu_type= Force an oprofile cpu type
1728 This might be useful if you have an older oprofile
1729 userland or if you want common events.
1730 Format: { archperfmon }
1731 archperfmon: [X86] Force use of architectural
1732 perfmon on Intel CPUs instead of the
1733 CPU specific event set.
1734
1608 osst= [HW,SCSI] SCSI Tape Driver 1735 osst= [HW,SCSI] SCSI Tape Driver
1609 Format: <buffer_size>,<write_threshold> 1736 Format: <buffer_size>,<write_threshold>
1610 See also Documentation/scsi/st.txt. 1737 See also Documentation/scsi/st.txt.
@@ -1659,6 +1786,8 @@ and is between 256 and 4096 characters. It is defined in the file
1659 See also Documentation/blockdev/paride.txt. 1786 See also Documentation/blockdev/paride.txt.
1660 1787
1661 pci=option[,option...] [PCI] various PCI subsystem options: 1788 pci=option[,option...] [PCI] various PCI subsystem options:
1789 earlydump [X86] dump PCI config space before the kernel
1790 changes anything
1662 off [X86] don't probe for the PCI bus 1791 off [X86] don't probe for the PCI bus
1663 bios [X86-32] force use of PCI BIOS, don't access 1792 bios [X86-32] force use of PCI BIOS, don't access
1664 the hardware directly. Use this if your machine 1793 the hardware directly. Use this if your machine
@@ -1676,8 +1805,11 @@ and is between 256 and 4096 characters. It is defined in the file
1676 disable the use of PCIE advanced error reporting. 1805 disable the use of PCIE advanced error reporting.
1677 nodomains [PCI] Disable support for multiple PCI 1806 nodomains [PCI] Disable support for multiple PCI
1678 root domains (aka PCI segments, in ACPI-speak). 1807 root domains (aka PCI segments, in ACPI-speak).
1679 nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI 1808 nommconf [X86] Disable use of MMCONFIG for PCI
1680 Configuration 1809 Configuration
1810 check_enable_amd_mmconf [X86] check for and enable
1811 properly configured MMIO access to PCI
1812 config space on AMD family 10h CPU
1681 nomsi [MSI] If the PCI_MSI kernel config parameter is 1813 nomsi [MSI] If the PCI_MSI kernel config parameter is
1682 enabled, this kernel boot option can be used to 1814 enabled, this kernel boot option can be used to
1683 disable the use of MSI interrupts system-wide. 1815 disable the use of MSI interrupts system-wide.
@@ -1730,7 +1862,7 @@ and is between 256 and 4096 characters. It is defined in the file
1730 IRQ routing is enabled. 1862 IRQ routing is enabled.
1731 noacpi [X86] Do not use ACPI for IRQ routing 1863 noacpi [X86] Do not use ACPI for IRQ routing
1732 or for PCI scanning. 1864 or for PCI scanning.
1733 use_crs [X86] Use _CRS for PCI resource 1865 nocrs [X86] Don't use _CRS for PCI resource
1734 allocation. 1866 allocation.
1735 routeirq Do IRQ routing for all PCI devices. 1867 routeirq Do IRQ routing for all PCI devices.
1736 This is normally done in pci_enable_device(), 1868 This is normally done in pci_enable_device(),
@@ -1758,6 +1890,21 @@ and is between 256 and 4096 characters. It is defined in the file
1758 cbmemsize=nn[KMG] The fixed amount of bus space which is 1890 cbmemsize=nn[KMG] The fixed amount of bus space which is
1759 reserved for the CardBus bridge's memory 1891 reserved for the CardBus bridge's memory
1760 window. The default value is 64 megabytes. 1892 window. The default value is 64 megabytes.
1893 resource_alignment=
1894 Format:
1895 [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
1896 Specifies alignment and device to reassign
1897 aligned memory resources.
1898 If <order of align> is not specified,
1899 PAGE_SIZE is used as alignment.
1900 PCI-PCI bridge can be specified, if resource
1901 windows need to be expanded.
1902 ecrc= Enable/disable PCIe ECRC (transaction layer
1903 end-to-end CRC checking).
1904 bios: Use BIOS/firmware settings. This is the
1905 the default.
1906 off: Turn ECRC off
1907 on: Turn ECRC on.
1761 1908
1762 pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power 1909 pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
1763 Management. 1910 Management.
@@ -1816,10 +1963,11 @@ and is between 256 and 4096 characters. It is defined in the file
1816 autoconfiguration. 1963 autoconfiguration.
1817 Ranges are in pairs (memory base and size). 1964 Ranges are in pairs (memory base and size).
1818 1965
1819 dynamic_printk Enables pr_debug()/dev_dbg() calls if 1966 ports= [IP_VS_FTP] IPVS ftp helper module
1820 CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. 1967 Default is 21.
1821 These can also be switched on/off via 1968 Up to 8 (IP_VS_APP_MAX_PORTS) ports
1822 <debugfs>/dynamic_printk/modules 1969 may be specified.
1970 Format: <port>,<port>....
1823 1971
1824 print-fatal-signals= 1972 print-fatal-signals=
1825 [KNL] debug: print fatal signals 1973 [KNL] debug: print fatal signals
@@ -1830,6 +1978,14 @@ and is between 256 and 4096 characters. It is defined in the file
1830 printk.time= Show timing data prefixed to each printk message line 1978 printk.time= Show timing data prefixed to each printk message line
1831 Format: <bool> (1/Y/y=enable, 0/N/n=disable) 1979 Format: <bool> (1/Y/y=enable, 0/N/n=disable)
1832 1980
1981 processor.max_cstate= [HW,ACPI]
1982 Limit processor to maximum C-state
1983 max_cstate=9 overrides any DMI blacklist limit.
1984
1985 processor.nocst [HW,ACPI]
1986 Ignore the _CST method to determine C-states,
1987 instead using the legacy FADT method
1988
1833 profile= [KNL] Enable kernel profiling via /proc/profile 1989 profile= [KNL] Enable kernel profiling via /proc/profile
1834 Format: [schedule,]<number> 1990 Format: [schedule,]<number>
1835 Param: "schedule" - profile schedule points. 1991 Param: "schedule" - profile schedule points.
@@ -1839,14 +1995,6 @@ and is between 256 and 4096 characters. It is defined in the file
1839 Requires CONFIG_SCHEDSTATS 1995 Requires CONFIG_SCHEDSTATS
1840 Param: "kvm" - profile VM exits. 1996 Param: "kvm" - profile VM exits.
1841 1997
1842 processor.max_cstate= [HW,ACPI]
1843 Limit processor to maximum C-state
1844 max_cstate=9 overrides any DMI blacklist limit.
1845
1846 processor.nocst [HW,ACPI]
1847 Ignore the _CST method to determine C-states,
1848 instead using the legacy FADT method
1849
1850 prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk 1998 prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
1851 before loading. 1999 before loading.
1852 See Documentation/blockdev/ramdisk.txt. 2000 See Documentation/blockdev/ramdisk.txt.
@@ -1911,7 +2059,7 @@ and is between 256 and 4096 characters. It is defined in the file
1911 2059
1912 relax_domain_level= 2060 relax_domain_level=
1913 [KNL, SMP] Set scheduler's default relax_domain_level. 2061 [KNL, SMP] Set scheduler's default relax_domain_level.
1914 See Documentation/cpusets.txt. 2062 See Documentation/cgroups/cpusets.txt.
1915 2063
1916 reserve= [KNL,BUGS] Force the kernel to ignore some iomem area 2064 reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
1917 2065
@@ -2000,7 +2148,13 @@ and is between 256 and 4096 characters. It is defined in the file
2000 allowing boot to proceed. none ignores them, expecting 2148 allowing boot to proceed. none ignores them, expecting
2001 user space to do the scan. 2149 user space to do the scan.
2002 2150
2003 selinux [SELINUX] Disable or enable SELinux at boot time. 2151 security= [SECURITY] Choose a security module to enable at boot.
2152 If this boot parameter is not specified, only the first
2153 security module asking for security registration will be
2154 loaded. An invalid security module name will be treated
2155 as if no module has been chosen.
2156
2157 selinux= [SELINUX] Disable or enable SELinux at boot time.
2004 Format: { "0" | "1" } 2158 Format: { "0" | "1" }
2005 See security/selinux/Kconfig help text. 2159 See security/selinux/Kconfig help text.
2006 0 -- disable. 2160 0 -- disable.
@@ -2009,15 +2163,6 @@ and is between 256 and 4096 characters. It is defined in the file
2009 If enabled at boot time, /selinux/disable can be used 2163 If enabled at boot time, /selinux/disable can be used
2010 later to disable prior to initial policy load. 2164 later to disable prior to initial policy load.
2011 2165
2012 selinux_compat_net =
2013 [SELINUX] Set initial selinux_compat_net flag value.
2014 Format: { "0" | "1" }
2015 0 -- use new secmark-based packet controls
2016 1 -- use legacy packet controls
2017 Default value is 0 (preferred).
2018 Value can be changed at runtime via
2019 /selinux/compat_net.
2020
2021 serialnumber [BUGS=X86-32] 2166 serialnumber [BUGS=X86-32]
2022 2167
2023 shapers= [NET] 2168 shapers= [NET]
@@ -2329,6 +2474,8 @@ and is between 256 and 4096 characters. It is defined in the file
2329 2474
2330 tp720= [HW,PS2] 2475 tp720= [HW,PS2]
2331 2476
2477 trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size.
2478
2332 trix= [HW,OSS] MediaTrix AudioTrix Pro 2479 trix= [HW,OSS] MediaTrix AudioTrix Pro
2333 Format: 2480 Format:
2334 <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> 2481 <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
@@ -2364,7 +2511,7 @@ and is between 256 and 4096 characters. It is defined in the file
2364 reported either. 2511 reported either.
2365 2512
2366 unknown_nmi_panic 2513 unknown_nmi_panic
2367 [X86-32,X86-64] 2514 [X86]
2368 Set unknown_nmi_panic=1 early on boot. 2515 Set unknown_nmi_panic=1 early on boot.
2369 2516
2370 usbcore.autosuspend= 2517 usbcore.autosuspend=
@@ -2431,15 +2578,12 @@ and is between 256 and 4096 characters. It is defined in the file
2431 medium is write-protected). 2578 medium is write-protected).
2432 Example: quirks=0419:aaf5:rl,0421:0433:rc 2579 Example: quirks=0419:aaf5:rl,0421:0433:rc
2433 2580
2434 add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in 2581 vdso= [X86,SH]
2435 kernel's map of available physical RAM.
2436
2437 vdso= [X86-32,SH,x86-64]
2438 vdso=2: enable compat VDSO (default with COMPAT_VDSO) 2582 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
2439 vdso=1: enable VDSO (default) 2583 vdso=1: enable VDSO (default)
2440 vdso=0: disable VDSO mapping 2584 vdso=0: disable VDSO mapping
2441 2585
2442 vdso32= [X86-32,X86-64] 2586 vdso32= [X86]
2443 vdso32=2: enable compat VDSO (default with COMPAT_VDSO) 2587 vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
2444 vdso32=1: enable 32-bit VDSO (default) 2588 vdso32=1: enable 32-bit VDSO (default)
2445 vdso32=0: disable 32-bit VDSO mapping 2589 vdso32=0: disable 32-bit VDSO mapping
@@ -2472,6 +2616,31 @@ and is between 256 and 4096 characters. It is defined in the file
2472 vmpoff= [KNL,S390] Perform z/VM CP command after power off. 2616 vmpoff= [KNL,S390] Perform z/VM CP command after power off.
2473 Format: <command> 2617 Format: <command>
2474 2618
2619 vt.default_blu= [VT]
2620 Format: <blue0>,<blue1>,<blue2>,...,<blue15>
2621 Change the default blue palette of the console.
2622 This is a 16-member array composed of values
2623 ranging from 0-255.
2624
2625 vt.default_grn= [VT]
2626 Format: <green0>,<green1>,<green2>,...,<green15>
2627 Change the default green palette of the console.
2628 This is a 16-member array composed of values
2629 ranging from 0-255.
2630
2631 vt.default_red= [VT]
2632 Format: <red0>,<red1>,<red2>,...,<red15>
2633 Change the default red palette of the console.
2634 This is a 16-member array composed of values
2635 ranging from 0-255.
2636
2637 vt.default_utf8=
2638 [VT]
2639 Format=<0|1>
2640 Set system-wide default UTF-8 mode for all tty's.
2641 Default is 1, i.e. UTF-8 mode is enabled for all
2642 newly opened terminals.
2643
2475 waveartist= [HW,OSS] 2644 waveartist= [HW,OSS]
2476 Format: <io>,<irq>,<dma>,<dma2> 2645 Format: <io>,<irq>,<dma>,<dma2>
2477 2646
@@ -2484,6 +2653,10 @@ and is between 256 and 4096 characters. It is defined in the file
2484 wdt= [WDT] Watchdog 2653 wdt= [WDT] Watchdog
2485 See Documentation/watchdog/wdt.txt. 2654 See Documentation/watchdog/wdt.txt.
2486 2655
2656 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
2657 default x2apic cluster mode on platforms
2658 supporting x2apic.
2659
2487 xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. 2660 xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
2488 xd_geo= See header of drivers/block/xd.c. 2661 xd_geo= See header of drivers/block/xd.c.
2489 2662
@@ -2491,9 +2664,6 @@ and is between 256 and 4096 characters. It is defined in the file
2491 Format: 2664 Format:
2492 <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] 2665 <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
2493 2666
2494 norandmaps Don't use address space randomization. Equivalent to
2495 echo 0 > /proc/sys/kernel/randomize_va_space
2496
2497______________________________________________________________________ 2667______________________________________________________________________
2498 2668
2499TODO: 2669TODO:
diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt
new file mode 100644
index 000000000000..363044609dad
--- /dev/null
+++ b/Documentation/kmemcheck.txt
@@ -0,0 +1,773 @@
1GETTING STARTED WITH KMEMCHECK
2==============================
3
4Vegard Nossum <vegardno@ifi.uio.no>
5
6
7Contents
8========
90. Introduction
101. Downloading
112. Configuring and compiling
123. How to use
133.1. Booting
143.2. Run-time enable/disable
153.3. Debugging
163.4. Annotating false positives
174. Reporting errors
185. Technical description
19
20
210. Introduction
22===============
23
24kmemcheck is a debugging feature for the Linux Kernel. More specifically, it
25is a dynamic checker that detects and warns about some uses of uninitialized
26memory.
27
28Userspace programmers might be familiar with Valgrind's memcheck. The main
29difference between memcheck and kmemcheck is that memcheck works for userspace
30programs only, and kmemcheck works for the kernel only. The implementations
31are of course vastly different. Because of this, kmemcheck is not as accurate
32as memcheck, but it turns out to be good enough in practice to discover real
33programmer errors that the compiler is not able to find through static
34analysis.
35
36Enabling kmemcheck on a kernel will probably slow it down to the extent that
37the machine will not be usable for normal workloads such as e.g. an
38interactive desktop. kmemcheck will also cause the kernel to use about twice
39as much memory as normal. For this reason, kmemcheck is strictly a debugging
40feature.
41
42
431. Downloading
44==============
45
46kmemcheck can only be downloaded using git. If you want to write patches
47against the current code, you should use the kmemcheck development branch of
48the tip tree. It is also possible to use the linux-next tree, which also
49includes the latest version of kmemcheck.
50
51Assuming that you've already cloned the linux-2.6.git repository, all you
52have to do is add the -tip tree as a remote, like this:
53
54 $ git remote add tip git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git
55
56To actually download the tree, fetch the remote:
57
58 $ git fetch tip
59
60And to check out a new local branch with the kmemcheck code:
61
62 $ git checkout -b kmemcheck tip/kmemcheck
63
64General instructions for the -tip tree can be found here:
65http://people.redhat.com/mingo/tip.git/readme.txt
66
67
682. Configuring and compiling
69============================
70
71kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of
72configuration variables must have specific settings in order for the kmemcheck
73menu to even appear in "menuconfig". These are:
74
75 o CONFIG_CC_OPTIMIZE_FOR_SIZE=n
76
77 This option is located under "General setup" / "Optimize for size".
78
79 Without this, gcc will use certain optimizations that usually lead to
80 false positive warnings from kmemcheck. An example of this is a 16-bit
81 field in a struct, where gcc may load 32 bits, then discard the upper
82 16 bits. kmemcheck sees only the 32-bit load, and may trigger a
83 warning for the upper 16 bits (if they're uninitialized).
84
85 o CONFIG_SLAB=y or CONFIG_SLUB=y
86
87 This option is located under "General setup" / "Choose SLAB
88 allocator".
89
90 o CONFIG_FUNCTION_TRACER=n
91
92 This option is located under "Kernel hacking" / "Tracers" / "Kernel
93 Function Tracer"
94
95 When function tracing is compiled in, gcc emits a call to another
96 function at the beginning of every function. This means that when the
97 page fault handler is called, the ftrace framework will be called
98 before kmemcheck has had a chance to handle the fault. If ftrace then
99 modifies memory that was tracked by kmemcheck, the result is an
100 endless recursive page fault.
101
102 o CONFIG_DEBUG_PAGEALLOC=n
103
104 This option is located under "Kernel hacking" / "Debug page memory
105 allocations".
106
107In addition, I highly recommend turning on CONFIG_DEBUG_INFO=y. This is also
108located under "Kernel hacking". With this, you will be able to get line number
109information from the kmemcheck warnings, which is extremely valuable in
110debugging a problem. This option is not mandatory, however, because it slows
111down the compilation process and produces a much bigger kernel image.
112
113Now the kmemcheck menu should be visible (under "Kernel hacking" / "kmemcheck:
114trap use of uninitialized memory"). Here follows a description of the
115kmemcheck configuration variables:
116
117 o CONFIG_KMEMCHECK
118
119 This must be enabled in order to use kmemcheck at all...
120
121 o CONFIG_KMEMCHECK_[DISABLED | ENABLED | ONESHOT]_BY_DEFAULT
122
123 This option controls the status of kmemcheck at boot-time. "Enabled"
124 will enable kmemcheck right from the start, "disabled" will boot the
125 kernel as normal (but with the kmemcheck code compiled in, so it can
126 be enabled at run-time after the kernel has booted), and "one-shot" is
127 a special mode which will turn kmemcheck off automatically after
128 detecting the first use of uninitialized memory.
129
130 If you are using kmemcheck to actively debug a problem, then you
131 probably want to choose "enabled" here.
132
133 The one-shot mode is mostly useful in automated test setups because it
134 can prevent floods of warnings and increase the chances of the machine
135 surviving in case something is really wrong. In other cases, the one-
136 shot mode could actually be counter-productive because it would turn
137 itself off at the very first error -- in the case of a false positive
138 too -- and this would come in the way of debugging the specific
139 problem you were interested in.
140
141 If you would like to use your kernel as normal, but with a chance to
142 enable kmemcheck in case of some problem, it might be a good idea to
143 choose "disabled" here. When kmemcheck is disabled, most of the run-
144 time overhead is not incurred, and the kernel will be almost as fast
145 as normal.
146
147 o CONFIG_KMEMCHECK_QUEUE_SIZE
148
149 Select the maximum number of error reports to store in an internal
150 (fixed-size) buffer. Since errors can occur virtually anywhere and in
151 any context, we need a temporary storage area which is guaranteed not
152 to generate any other page faults when accessed. The queue will be
153 emptied as soon as a tasklet may be scheduled. If the queue is full,
154 new error reports will be lost.
155
156 The default value of 64 is probably fine. If some code produces more
157 than 64 errors within an irqs-off section, then the code is likely to
158 produce many, many more, too, and these additional reports seldom give
159 any more information (the first report is usually the most valuable
160 anyway).
161
162 This number might have to be adjusted if you are not using serial
163 console or similar to capture the kernel log. If you are using the
164 "dmesg" command to save the log, then getting a lot of kmemcheck
165 warnings might overflow the kernel log itself, and the earlier reports
166 will get lost in that way instead. Try setting this to 10 or so on
167 such a setup.
168
169 o CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT
170
171 Select the number of shadow bytes to save along with each entry of the
172 error-report queue. These bytes indicate what parts of an allocation
173 are initialized, uninitialized, etc. and will be displayed when an
174 error is detected to help the debugging of a particular problem.
175
176 The number entered here is actually the logarithm of the number of
177 bytes that will be saved. So if you pick for example 5 here, kmemcheck
178 will save 2^5 = 32 bytes.
179
180 The default value should be fine for debugging most problems. It also
181 fits nicely within 80 columns.
182
183 o CONFIG_KMEMCHECK_PARTIAL_OK
184
185 This option (when enabled) works around certain GCC optimizations that
186 produce 32-bit reads from 16-bit variables where the upper 16 bits are
187 thrown away afterwards.
188
189 The default value (enabled) is recommended. This may of course hide
190 some real errors, but disabling it would probably produce a lot of
191 false positives.
192
193 o CONFIG_KMEMCHECK_BITOPS_OK
194
195 This option silences warnings that would be generated for bit-field
196 accesses where not all the bits are initialized at the same time. This
197 may also hide some real bugs.
198
199 This option is probably obsolete, or it should be replaced with
200 the kmemcheck-/bitfield-annotations for the code in question. The
201 default value is therefore fine.
202
203Now compile the kernel as usual.
204
205
2063. How to use
207=============
208
2093.1. Booting
210============
211
212First some information about the command-line options. There is only one
213option specific to kmemcheck, and this is called "kmemcheck". It can be used
214to override the default mode as chosen by the CONFIG_KMEMCHECK_*_BY_DEFAULT
215option. Its possible settings are:
216
217 o kmemcheck=0 (disabled)
218 o kmemcheck=1 (enabled)
219 o kmemcheck=2 (one-shot mode)
220
221If SLUB debugging has been enabled in the kernel, it may take precedence over
222kmemcheck in such a way that the slab caches which are under SLUB debugging
223will not be tracked by kmemcheck. In order to ensure that this doesn't happen
224(even though it shouldn't by default), use SLUB's boot option "slub_debug",
225like this: slub_debug=-
226
227In fact, this option may also be used for fine-grained control over SLUB vs.
228kmemcheck. For example, if the command line includes "kmemcheck=1
229slub_debug=,dentry", then SLUB debugging will be used only for the "dentry"
230slab cache, and with kmemcheck tracking all the other caches. This is advanced
231usage, however, and is not generally recommended.
232
233
2343.2. Run-time enable/disable
235============================
236
237When the kernel has booted, it is possible to enable or disable kmemcheck at
238run-time. WARNING: This feature is still experimental and may cause false
239positive warnings to appear. Therefore, try not to use this. If you find that
240it doesn't work properly (e.g. you see an unreasonable amount of warnings), I
241will be happy to take bug reports.
242
243Use the file /proc/sys/kernel/kmemcheck for this purpose, e.g.:
244
245 $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck
246
247The numbers are the same as for the kmemcheck= command-line option.
248
249
2503.3. Debugging
251==============
252
253A typical report will look something like this:
254
255WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
25680000000000000000000000000000000000000000088ffff0000000000000000
257 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
258 ^
259
260Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A
261RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
262RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002
263RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
264RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84
265RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000
266R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e
267R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8
268FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000
269CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
270CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0
271DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
272DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
273 [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
274 [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
275 [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
276 [<ffffffff8100c7b5>] int_signal+0x12/0x17
277 [<ffffffffffffffff>] 0xffffffffffffffff
278
279The single most valuable information in this report is the RIP (or EIP on 32-
280bit) value. This will help us pinpoint exactly which instruction that caused
281the warning.
282
283If your kernel was compiled with CONFIG_DEBUG_INFO=y, then all we have to do
284is give this address to the addr2line program, like this:
285
286 $ addr2line -e vmlinux -i ffffffff8104ede8
287 arch/x86/include/asm/string_64.h:12
288 include/asm-generic/siginfo.h:287
289 kernel/signal.c:380
290 kernel/signal.c:410
291
292The "-e vmlinux" tells addr2line which file to look in. IMPORTANT: This must
293be the vmlinux of the kernel that produced the warning in the first place! If
294not, the line number information will almost certainly be wrong.
295
296The "-i" tells addr2line to also print the line numbers of inlined functions.
297In this case, the flag was very important, because otherwise, it would only
298have printed the first line, which is just a call to memcpy(), which could be
299called from a thousand places in the kernel, and is therefore not very useful.
300These inlined functions would not show up in the stack trace above, simply
301because the kernel doesn't load the extra debugging information. This
302technique can of course be used with ordinary kernel oopses as well.
303
304In this case, it's the caller of memcpy() that is interesting, and it can be
305found in include/asm-generic/siginfo.h, line 287:
306
307281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
308282 {
309283 if (from->si_code < 0)
310284 memcpy(to, from, sizeof(*to));
311285 else
312286 /* _sigchld is currently the largest know union member */
313287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
314288 }
315
316Since this was a read (kmemcheck usually warns about reads only, though it can
317warn about writes to unallocated or freed memory as well), it was probably the
318"from" argument which contained some uninitialized bytes. Following the chain
319of calls, we move upwards to see where "from" was allocated or initialized,
320kernel/signal.c, line 380:
321
322359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
323360 {
324...
325367 list_for_each_entry(q, &list->list, list) {
326368 if (q->info.si_signo == sig) {
327369 if (first)
328370 goto still_pending;
329371 first = q;
330...
331377 if (first) {
332378 still_pending:
333379 list_del_init(&first->list);
334380 copy_siginfo(info, &first->info);
335381 __sigqueue_free(first);
336...
337392 }
338393 }
339
340Here, it is &first->info that is being passed on to copy_siginfo(). The
341variable "first" was found on a list -- passed in as the second argument to
342collect_signal(). We continue our journey through the stack, to figure out
343where the item on "list" was allocated or initialized. We move to line 410:
344
345395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
346396 siginfo_t *info)
347397 {
348...
349410 collect_signal(sig, pending, info);
350...
351414 }
352
353Now we need to follow the "pending" pointer, since that is being passed on to
354collect_signal() as "list". At this point, we've run out of lines from the
355"addr2line" output. Not to worry, we just paste the next addresses from the
356kmemcheck stack dump, i.e.:
357
358 [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
359 [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
360 [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
361 [<ffffffff8100c7b5>] int_signal+0x12/0x17
362
363 $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \
364 ffffffff8100b87d ffffffff8100c7b5
365 kernel/signal.c:446
366 kernel/signal.c:1806
367 arch/x86/kernel/signal.c:805
368 arch/x86/kernel/signal.c:871
369 arch/x86/kernel/entry_64.S:694
370
371Remember that since these addresses were found on the stack and not as the
372RIP value, they actually point to the _next_ instruction (they are return
373addresses). This becomes obvious when we look at the code for line 446:
374
375422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
376423 {
377...
378431 signr = __dequeue_signal(&tsk->signal->shared_pending,
379432 mask, info);
380433 /*
381434 * itimer signal ?
382435 *
383436 * itimers are process shared and we restart periodic
384437 * itimers in the signal delivery path to prevent DoS
385438 * attacks in the high resolution timer case. This is
386439 * compliant with the old way of self restarting
387440 * itimers, as the SIGALRM is a legacy signal and only
388441 * queued once. Changing the restart behaviour to
389442 * restart the timer in the signal dequeue path is
390443 * reducing the timer noise on heavy loaded !highres
391444 * systems too.
392445 */
393446 if (unlikely(signr == SIGALRM)) {
394...
395489 }
396
397So instead of looking at 446, we should be looking at 431, which is the line
398that executes just before 446. Here we see that what we are looking for is
399&tsk->signal->shared_pending.
400
401Our next task is now to figure out which function that puts items on this
402"shared_pending" list. A crude, but efficient tool, is git grep:
403
404 $ git grep -n 'shared_pending' kernel/
405 ...
406 kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending;
407 kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending;
408 ...
409
410There were more results, but none of them were related to list operations,
411and these were the only assignments. We inspect the line numbers more closely
412and find that this is indeed where items are being added to the list:
413
414816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
415817 int group)
416818 {
417...
418828 pending = group ? &t->signal->shared_pending : &t->pending;
419...
420851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
421852 (is_si_special(info) ||
422853 info->si_code >= 0)));
423854 if (q) {
424855 list_add_tail(&q->list, &pending->list);
425...
426890 }
427
428and:
429
4301309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
4311310 {
432....
4331339 pending = group ? &t->signal->shared_pending : &t->pending;
4341340 list_add_tail(&q->list, &pending->list);
435....
4361347 }
437
438In the first case, the list element we are looking for, "q", is being returned
439from the function __sigqueue_alloc(), which looks like an allocation function.
440Let's take a look at it:
441
442187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
443188 int override_rlimit)
444189 {
445190 struct sigqueue *q = NULL;
446191 struct user_struct *user;
447192
448193 /*
449194 * We won't get problems with the target's UID changing under us
450195 * because changing it requires RCU be used, and if t != current, the
451196 * caller must be holding the RCU readlock (by way of a spinlock) and
452197 * we use RCU protection here
453198 */
454199 user = get_uid(__task_cred(t)->user);
455200 atomic_inc(&user->sigpending);
456201 if (override_rlimit ||
457202 atomic_read(&user->sigpending) <=
458203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
459204 q = kmem_cache_alloc(sigqueue_cachep, flags);
460205 if (unlikely(q == NULL)) {
461206 atomic_dec(&user->sigpending);
462207 free_uid(user);
463208 } else {
464209 INIT_LIST_HEAD(&q->list);
465210 q->flags = 0;
466211 q->user = user;
467212 }
468213
469214 return q;
470215 }
471
472We see that this function initializes q->list, q->flags, and q->user. It seems
473that now is the time to look at the definition of "struct sigqueue", e.g.:
474
47514 struct sigqueue {
47615 struct list_head list;
47716 int flags;
47817 siginfo_t info;
47918 struct user_struct *user;
48019 };
481
482And, you might remember, it was a memcpy() on &first->info that caused the
483warning, so this makes perfect sense. It also seems reasonable to assume that
484it is the caller of __sigqueue_alloc() that has the responsibility of filling
485out (initializing) this member.
486
487But just which fields of the struct were uninitialized? Let's look at
488kmemcheck's report again:
489
490WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
49180000000000000000000000000000000000000000088ffff0000000000000000
492 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
493 ^
494
495These first two lines are the memory dump of the memory object itself, and the
496shadow bytemap, respectively. The memory object itself is in this case
497&first->info. Just beware that the start of this dump is NOT the start of the
498object itself! The position of the caret (^) corresponds with the address of
499the read (ffff88003e4a2024).
500
501The shadow bytemap dump legend is as follows:
502
503 i - initialized
504 u - uninitialized
505 a - unallocated (memory has been allocated by the slab layer, but has not
506 yet been handed off to anybody)
507 f - freed (memory has been allocated by the slab layer, but has been freed
508 by the previous owner)
509
510In order to figure out where (relative to the start of the object) the
511uninitialized memory was located, we have to look at the disassembly. For
512that, we'll need the RIP address again:
513
514RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
515
516 $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8:
517 ffffffff8104edc8: mov %r8,0x8(%r8)
518 ffffffff8104edcc: test %r10d,%r10d
519 ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168>
520 ffffffff8104edd5: mov %rax,%rdx
521 ffffffff8104edd8: mov $0xc,%ecx
522 ffffffff8104eddd: mov %r13,%rdi
523 ffffffff8104ede0: mov $0x30,%eax
524 ffffffff8104ede5: mov %rdx,%rsi
525 ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi)
526 ffffffff8104edea: test $0x2,%al
527 ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0>
528 ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi)
529 ffffffff8104edf0: test $0x1,%al
530 ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5>
531 ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi)
532 ffffffff8104edf5: mov %r8,%rdi
533 ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free>
534
535As expected, it's the "rep movsl" instruction from the memcpy() that causes
536the warning. We know about REP MOVSL that it uses the register RCX to count
537the number of remaining iterations. By taking a look at the register dump
538again (from the kmemcheck report), we can figure out how many bytes were left
539to copy:
540
541RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
542
543By looking at the disassembly, we also see that %ecx is being loaded with the
544value $0xc just before (ffffffff8104edd8), so we are very lucky. Keep in mind
545that this is the number of iterations, not bytes. And since this is a "long"
546operation, we need to multiply by 4 to get the number of bytes. So this means
547that the uninitialized value was encountered at 4 * (0xc - 0x9) = 12 bytes
548from the start of the object.
549
550We can now try to figure out which field of the "struct siginfo" that was not
551initialized. This is the beginning of the struct:
552
55340 typedef struct siginfo {
55441 int si_signo;
55542 int si_errno;
55643 int si_code;
55744
55845 union {
559..
56092 } _sifields;
56193 } siginfo_t;
562
563On 64-bit, the int is 4 bytes long, so it must the the union member that has
564not been initialized. We can verify this using gdb:
565
566 $ gdb vmlinux
567 ...
568 (gdb) p &((struct siginfo *) 0)->_sifields
569 $1 = (union {...} *) 0x10
570
571Actually, it seems that the union member is located at offset 0x10 -- which
572means that gcc has inserted 4 bytes of padding between the members si_code
573and _sifields. We can now get a fuller picture of the memory dump:
574
575 _----------------------------=> si_code
576 / _--------------------=> (padding)
577 | / _------------=> _sifields(._kill._pid)
578 | | / _----=> _sifields(._kill._uid)
579 | | | /
580-------|-------|-------|-------|
58180000000000000000000000000000000000000000088ffff0000000000000000
582 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
583
584This allows us to realize another important fact: si_code contains the value
5850x80. Remember that x86 is little endian, so the first 4 bytes "80000000" are
586really the number 0x00000080. With a bit of research, we find that this is
587actually the constant SI_KERNEL defined in include/asm-generic/siginfo.h:
588
589144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
590
591This macro is used in exactly one place in the x86 kernel: In send_signal()
592in kernel/signal.c:
593
594816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
595817 int group)
596818 {
597...
598828 pending = group ? &t->signal->shared_pending : &t->pending;
599...
600851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
601852 (is_si_special(info) ||
602853 info->si_code >= 0)));
603854 if (q) {
604855 list_add_tail(&q->list, &pending->list);
605856 switch ((unsigned long) info) {
606...
607865 case (unsigned long) SEND_SIG_PRIV:
608866 q->info.si_signo = sig;
609867 q->info.si_errno = 0;
610868 q->info.si_code = SI_KERNEL;
611869 q->info.si_pid = 0;
612870 q->info.si_uid = 0;
613871 break;
614...
615890 }
616
617Not only does this match with the .si_code member, it also matches the place
618we found earlier when looking for where siginfo_t objects are enqueued on the
619"shared_pending" list.
620
621So to sum up: It seems that it is the padding introduced by the compiler
622between two struct fields that is uninitialized, and this gets reported when
623we do a memcpy() on the struct. This means that we have identified a false
624positive warning.
625
626Normally, kmemcheck will not report uninitialized accesses in memcpy() calls
627when both the source and destination addresses are tracked. (Instead, we copy
628the shadow bytemap as well). In this case, the destination address clearly
629was not tracked. We can dig a little deeper into the stack trace from above:
630
631 arch/x86/kernel/signal.c:805
632 arch/x86/kernel/signal.c:871
633 arch/x86/kernel/entry_64.S:694
634
635And we clearly see that the destination siginfo object is located on the
636stack:
637
638782 static void do_signal(struct pt_regs *regs)
639783 {
640784 struct k_sigaction ka;
641785 siginfo_t info;
642...
643804 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
644...
645854 }
646
647And this &info is what eventually gets passed to copy_siginfo() as the
648destination argument.
649
650Now, even though we didn't find an actual error here, the example is still a
651good one, because it shows how one would go about to find out what the report
652was all about.
653
654
6553.4. Annotating false positives
656===============================
657
658There are a few different ways to make annotations in the source code that
659will keep kmemcheck from checking and reporting certain allocations. Here
660they are:
661
662 o __GFP_NOTRACK_FALSE_POSITIVE
663
664 This flag can be passed to kmalloc() or kmem_cache_alloc() (therefore
665 also to other functions that end up calling one of these) to indicate
666 that the allocation should not be tracked because it would lead to
667 a false positive report. This is a "big hammer" way of silencing
668 kmemcheck; after all, even if the false positive pertains to
669 particular field in a struct, for example, we will now lose the
670 ability to find (real) errors in other parts of the same struct.
671
672 Example:
673
674 /* No warnings will ever trigger on accessing any part of x */
675 x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE);
676
677 o kmemcheck_bitfield_begin(name)/kmemcheck_bitfield_end(name) and
678 kmemcheck_annotate_bitfield(ptr, name)
679
680 The first two of these three macros can be used inside struct
681 definitions to signal, respectively, the beginning and end of a
682 bitfield. Additionally, this will assign the bitfield a name, which
683 is given as an argument to the macros.
684
685 Having used these markers, one can later use
686 kmemcheck_annotate_bitfield() at the point of allocation, to indicate
687 which parts of the allocation is part of a bitfield.
688
689 Example:
690
691 struct foo {
692 int x;
693
694 kmemcheck_bitfield_begin(flags);
695 int flag_a:1;
696 int flag_b:1;
697 kmemcheck_bitfield_end(flags);
698
699 int y;
700 };
701
702 struct foo *x = kmalloc(sizeof *x);
703
704 /* No warnings will trigger on accessing the bitfield of x */
705 kmemcheck_annotate_bitfield(x, flags);
706
707 Note that kmemcheck_annotate_bitfield() can be used even before the
708 return value of kmalloc() is checked -- in other words, passing NULL
709 as the first argument is legal (and will do nothing).
710
711
7124. Reporting errors
713===================
714
715As we have seen, kmemcheck will produce false positive reports. Therefore, it
716is not very wise to blindly post kmemcheck warnings to mailing lists and
717maintainers. Instead, I encourage maintainers and developers to find errors
718in their own code. If you get a warning, you can try to work around it, try
719to figure out if it's a real error or not, or simply ignore it. Most
720developers know their own code and will quickly and efficiently determine the
721root cause of a kmemcheck report. This is therefore also the most efficient
722way to work with kmemcheck.
723
724That said, we (the kmemcheck maintainers) will always be on the lookout for
725false positives that we can annotate and silence. So whatever you find,
726please drop us a note privately! Kernel configs and steps to reproduce (if
727available) are of course a great help too.
728
729Happy hacking!
730
731
7325. Technical description
733========================
734
735kmemcheck works by marking memory pages non-present. This means that whenever
736somebody attempts to access the page, a page fault is generated. The page
737fault handler notices that the page was in fact only hidden, and so it calls
738on the kmemcheck code to make further investigations.
739
740When the investigations are completed, kmemcheck "shows" the page by marking
741it present (as it would be under normal circumstances). This way, the
742interrupted code can continue as usual.
743
744But after the instruction has been executed, we should hide the page again, so
745that we can catch the next access too! Now kmemcheck makes use of a debugging
746feature of the processor, namely single-stepping. When the processor has
747finished the one instruction that generated the memory access, a debug
748exception is raised. From here, we simply hide the page again and continue
749execution, this time with the single-stepping feature turned off.
750
751kmemcheck requires some assistance from the memory allocator in order to work.
752The memory allocator needs to
753
754 1. Tell kmemcheck about newly allocated pages and pages that are about to
755 be freed. This allows kmemcheck to set up and tear down the shadow memory
756 for the pages in question. The shadow memory stores the status of each
757 byte in the allocation proper, e.g. whether it is initialized or
758 uninitialized.
759
760 2. Tell kmemcheck which parts of memory should be marked uninitialized.
761 There are actually a few more states, such as "not yet allocated" and
762 "recently freed".
763
764If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
765memory that can take page faults because of kmemcheck.
766
767If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
768request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags.
769This does not prevent the page faults from occurring, however, but marks the
770object in question as being initialized so that no warnings will ever be
771produced for this object.
772
773Currently, the SLAB and SLUB allocators are supported by kmemcheck.
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
new file mode 100644
index 000000000000..0112da3b9ab8
--- /dev/null
+++ b/Documentation/kmemleak.txt
@@ -0,0 +1,142 @@
1Kernel Memory Leak Detector
2===========================
3
4Introduction
5------------
6
7Kmemleak provides a way of detecting possible kernel memory leaks in a
8way similar to a tracing garbage collector
9(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
10with the difference that the orphan objects are not freed but only
11reported via /sys/kernel/debug/kmemleak. A similar method is used by the
12Valgrind tool (memcheck --leak-check) to detect the memory leaks in
13user-space applications.
14
15Usage
16-----
17
18CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
19thread scans the memory every 10 minutes (by default) and prints any new
20unreferenced objects found. To trigger an intermediate scan and display
21all the possible memory leaks:
22
23 # mount -t debugfs nodev /sys/kernel/debug/
24 # cat /sys/kernel/debug/kmemleak
25
26Note that the orphan objects are listed in the order they were allocated
27and one object at the beginning of the list may cause other subsequent
28objects to be reported as orphan.
29
30Memory scanning parameters can be modified at run-time by writing to the
31/sys/kernel/debug/kmemleak file. The following parameters are supported:
32
33 off - disable kmemleak (irreversible)
34 stack=on - enable the task stacks scanning
35 stack=off - disable the tasks stacks scanning
36 scan=on - start the automatic memory scanning thread
37 scan=off - stop the automatic memory scanning thread
38 scan=<secs> - set the automatic memory scanning period in seconds (0
39 to disable it)
40
41Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
42the kernel command line.
43
44Basic Algorithm
45---------------
46
47The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
48friends are traced and the pointers, together with additional
49information like size and stack trace, are stored in a prio search tree.
50The corresponding freeing function calls are tracked and the pointers
51removed from the kmemleak data structures.
52
53An allocated block of memory is considered orphan if no pointer to its
54start address or to any location inside the block can be found by
55scanning the memory (including saved registers). This means that there
56might be no way for the kernel to pass the address of the allocated
57block to a freeing function and therefore the block is considered a
58memory leak.
59
60The scanning algorithm steps:
61
62 1. mark all objects as white (remaining white objects will later be
63 considered orphan)
64 2. scan the memory starting with the data section and stacks, checking
65 the values against the addresses stored in the prio search tree. If
66 a pointer to a white object is found, the object is added to the
67 gray list
68 3. scan the gray objects for matching addresses (some white objects
69 can become gray and added at the end of the gray list) until the
70 gray set is finished
71 4. the remaining white objects are considered orphan and reported via
72 /sys/kernel/debug/kmemleak
73
74Some allocated memory blocks have pointers stored in the kernel's
75internal data structures and they cannot be detected as orphans. To
76avoid this, kmemleak can also store the number of values pointing to an
77address inside the block address range that need to be found so that the
78block is not considered a leak. One example is __vmalloc().
79
80Kmemleak API
81------------
82
83See the include/linux/kmemleak.h header for the functions prototype.
84
85kmemleak_init - initialize kmemleak
86kmemleak_alloc - notify of a memory block allocation
87kmemleak_free - notify of a memory block freeing
88kmemleak_not_leak - mark an object as not a leak
89kmemleak_ignore - do not scan or report an object as leak
90kmemleak_scan_area - add scan areas inside a memory block
91kmemleak_no_scan - do not scan a memory block
92kmemleak_erase - erase an old value in a pointer variable
93kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness
94kmemleak_free_recursive - as kmemleak_free but checks the recursiveness
95
96Dealing with false positives/negatives
97--------------------------------------
98
99The false negatives are real memory leaks (orphan objects) but not
100reported by kmemleak because values found during the memory scanning
101point to such objects. To reduce the number of false negatives, kmemleak
102provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and
103kmemleak_erase functions (see above). The task stacks also increase the
104amount of false negatives and their scanning is not enabled by default.
105
106The false positives are objects wrongly reported as being memory leaks
107(orphan). For objects known not to be leaks, kmemleak provides the
108kmemleak_not_leak function. The kmemleak_ignore could also be used if
109the memory block is known not to contain other pointers and it will no
110longer be scanned.
111
112Some of the reported leaks are only transient, especially on SMP
113systems, because of pointers temporarily stored in CPU registers or
114stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing
115the minimum age of an object to be reported as a memory leak.
116
117Limitations and Drawbacks
118-------------------------
119
120The main drawback is the reduced performance of memory allocation and
121freeing. To avoid other penalties, the memory scanning is only performed
122when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is
123intended for debugging purposes where the performance might not be the
124most important requirement.
125
126To keep the algorithm simple, kmemleak scans for values pointing to any
127address inside a block's address range. This may lead to an increased
128number of false negatives. However, it is likely that a real memory leak
129will eventually become visible.
130
131Another source of false negatives is the data stored in non-pointer
132values. In a future version, kmemleak could only scan the pointer
133members in the allocated structures. This feature would solve many of
134the false negative cases described above.
135
136The tool can report false positives. These are cases where an allocated
137block doesn't need to be freed (some cases in the init_call functions),
138the pointer is calculated by other methods than the usual container_of
139macro or the pointer is stored in a location not scanned by kmemleak.
140
141Page allocations and ioremap are not tracked. Only the ARM and x86
142architectures are currently supported.
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index b2e374586bd8..c79ab996dada 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -132,7 +132,7 @@ kobject_name():
132 const char *kobject_name(const struct kobject * kobj); 132 const char *kobject_name(const struct kobject * kobj);
133 133
134There is a helper function to both initialize and add the kobject to the 134There is a helper function to both initialize and add the kobject to the
135kernel at the same time, called supprisingly enough kobject_init_and_add(): 135kernel at the same time, called surprisingly enough kobject_init_and_add():
136 136
137 int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, 137 int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
138 struct kobject *parent, const char *fmt, ...); 138 struct kobject *parent, const char *fmt, ...);
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 48b3de90eb1e..053037a1fe6d 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -212,7 +212,9 @@ hit, Kprobes calls kp->pre_handler. After the probed instruction
212is single-stepped, Kprobe calls kp->post_handler. If a fault 212is single-stepped, Kprobe calls kp->post_handler. If a fault
213occurs during execution of kp->pre_handler or kp->post_handler, 213occurs during execution of kp->pre_handler or kp->post_handler,
214or during single-stepping of the probed instruction, Kprobes calls 214or during single-stepping of the probed instruction, Kprobes calls
215kp->fault_handler. Any or all handlers can be NULL. 215kp->fault_handler. Any or all handlers can be NULL. If kp->flags
216is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled,
217so, it's handlers aren't hit until calling enable_kprobe(kp).
216 218
217NOTE: 219NOTE:
2181. With the introduction of the "symbol_name" field to struct kprobe, 2201. With the introduction of the "symbol_name" field to struct kprobe,
@@ -363,6 +365,26 @@ probes) in the specified array, they clear the addr field of those
363incorrect probes. However, other probes in the array are 365incorrect probes. However, other probes in the array are
364unregistered correctly. 366unregistered correctly.
365 367
3684.7 disable_*probe
369
370#include <linux/kprobes.h>
371int disable_kprobe(struct kprobe *kp);
372int disable_kretprobe(struct kretprobe *rp);
373int disable_jprobe(struct jprobe *jp);
374
375Temporarily disables the specified *probe. You can enable it again by using
376enable_*probe(). You must specify the probe which has been registered.
377
3784.8 enable_*probe
379
380#include <linux/kprobes.h>
381int enable_kprobe(struct kprobe *kp);
382int enable_kretprobe(struct kretprobe *rp);
383int enable_jprobe(struct jprobe *jp);
384
385Enables *probe which has been disabled by disable_*probe(). You must specify
386the probe which has been registered.
387
3665. Kprobes Features and Limitations 3885. Kprobes Features and Limitations
367 389
368Kprobes allows multiple probes at the same address. Currently, 390Kprobes allows multiple probes at the same address. Currently,
@@ -485,9 +507,9 @@ http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
485Appendix A: The kprobes debugfs interface 507Appendix A: The kprobes debugfs interface
486 508
487With recent kernels (> 2.6.20) the list of registered kprobes is visible 509With recent kernels (> 2.6.20) the list of registered kprobes is visible
488under the /debug/kprobes/ directory (assuming debugfs is mounted at /debug). 510under the /sys/kernel/debug/kprobes/ directory (assuming debugfs is mounted at //sys/kernel/debug).
489 511
490/debug/kprobes/list: Lists all registered probes on the system 512/sys/kernel/debug/kprobes/list: Lists all registered probes on the system
491 513
492c015d71a k vfs_read+0x0 514c015d71a k vfs_read+0x0
493c011a316 j do_fork+0x0 515c011a316 j do_fork+0x0
@@ -500,10 +522,14 @@ the probe. If the probed function belongs to a module, the module name
500is also specified. Following columns show probe status. If the probe is on 522is also specified. Following columns show probe status. If the probe is on
501a virtual address that is no longer valid (module init sections, module 523a virtual address that is no longer valid (module init sections, module
502virtual addresses that correspond to modules that've been unloaded), 524virtual addresses that correspond to modules that've been unloaded),
503such probes are marked with [GONE]. 525such probes are marked with [GONE]. If the probe is temporarily disabled,
526such probes are marked with [DISABLED].
504 527
505/debug/kprobes/enabled: Turn kprobes ON/OFF 528/sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
506 529
507Provides a knob to globally turn registered kprobes ON or OFF. By default, 530Provides a knob to globally and forcibly turn registered kprobes ON or OFF.
508all kprobes are enabled. By echoing "0" to this file, all registered probes 531By default, all kprobes are enabled. By echoing "0" to this file, all
509will be disarmed, till such time a "1" is echoed to this file. 532registered probes will be disarmed, till such time a "1" is echoed to this
533file. Note that this knob just disarms and arms all kprobes and doesn't
534change each probe's disabling state. This means that disabled kprobes (marked
535[DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt
index 2b3a6b5260bf..0768fcc3ba3e 100644
--- a/Documentation/laptops/acer-wmi.txt
+++ b/Documentation/laptops/acer-wmi.txt
@@ -1,9 +1,9 @@
1Acer Laptop WMI Extras Driver 1Acer Laptop WMI Extras Driver
2http://code.google.com/p/aceracpi 2http://code.google.com/p/aceracpi
3Version 0.2 3Version 0.3
418th August 2008 44th April 2009
5 5
6Copyright 2007-2008 Carlos Corbacho <carlos@strangeworlds.co.uk> 6Copyright 2007-2009 Carlos Corbacho <carlos@strangeworlds.co.uk>
7 7
8acer-wmi is a driver to allow you to control various parts of your Acer laptop 8acer-wmi is a driver to allow you to control various parts of your Acer laptop
9hardware under Linux which are exposed via ACPI-WMI. 9hardware under Linux which are exposed via ACPI-WMI.
@@ -36,7 +36,11 @@ not possible in kernel space from a 64 bit OS.
36Supported Hardware 36Supported Hardware
37****************** 37******************
38 38
39Please see the website for the current list of known working hardare: 39NOTE: The Acer Aspire One is not supported hardware. It cannot work with
40acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been
41blacklisted until that happens.
42
43Please see the website for the current list of known working hardware:
40 44
41http://code.google.com/p/aceracpi/wiki/SupportedHardware 45http://code.google.com/p/aceracpi/wiki/SupportedHardware
42 46
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt
index 8b2bc1572d98..23ce7d350d1a 100644
--- a/Documentation/laptops/sony-laptop.txt
+++ b/Documentation/laptops/sony-laptop.txt
@@ -22,7 +22,7 @@ If your laptop model supports it, you will find sysfs files in the
22/sys/class/backlight/sony/ 22/sys/class/backlight/sony/
23directory. You will be able to query and set the current screen 23directory. You will be able to query and set the current screen
24brightness: 24brightness:
25 brightness get/set screen brightness (an iteger 25 brightness get/set screen brightness (an integer
26 between 0 and 7) 26 between 0 and 7)
27 actual_brightness reading from this file will query the HW 27 actual_brightness reading from this file will query the HW
28 to get real brightness value 28 to get real brightness value
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 41bc99fa1884..78e354b42f67 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
1 ThinkPad ACPI Extras Driver 1 ThinkPad ACPI Extras Driver
2 2
3 Version 0.22 3 Version 0.23
4 November 23rd, 2008 4 April 10th, 2009
5 5
6 Borislav Deianov <borislav@users.sf.net> 6 Borislav Deianov <borislav@users.sf.net>
7 Henrique de Moraes Holschuh <hmh@hmh.eng.br> 7 Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -20,7 +20,8 @@ moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
20kernel 2.6.29 and release 0.22. 20kernel 2.6.29 and release 0.22.
21 21
22The driver is named "thinkpad-acpi". In some places, like module 22The driver is named "thinkpad-acpi". In some places, like module
23names, "thinkpad_acpi" is used because of userspace issues. 23names and log messages, "thinkpad_acpi" is used because of userspace
24issues.
24 25
25"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too 26"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
26long due to length limitations on some Linux kernel versions. 27long due to length limitations on some Linux kernel versions.
@@ -37,7 +38,7 @@ detailed description):
37 - ThinkLight on and off 38 - ThinkLight on and off
38 - limited docking and undocking 39 - limited docking and undocking
39 - UltraBay eject 40 - UltraBay eject
40 - CMOS control 41 - CMOS/UCMS control
41 - LED control 42 - LED control
42 - ACPI sounds 43 - ACPI sounds
43 - temperature sensors 44 - temperature sensors
@@ -46,6 +47,7 @@ detailed description):
46 - Volume control 47 - Volume control
47 - Fan control and monitoring: fan speed, fan enable/disable 48 - Fan control and monitoring: fan speed, fan enable/disable
48 - WAN enable and disable 49 - WAN enable and disable
50 - UWB enable and disable
49 51
50A compatibility table by model and feature is maintained on the web 52A compatibility table by model and feature is maintained on the web
51site, http://ibm-acpi.sf.net/. I appreciate any success or failure 53site, http://ibm-acpi.sf.net/. I appreciate any success or failure
@@ -53,7 +55,7 @@ reports, especially if they add to or correct the compatibility table.
53Please include the following information in your report: 55Please include the following information in your report:
54 56
55 - ThinkPad model name 57 - ThinkPad model name
56 - a copy of your DSDT, from /proc/acpi/dsdt 58 - a copy of your ACPI tables, using the "acpidump" utility
57 - a copy of the output of dmidecode, with serial numbers 59 - a copy of the output of dmidecode, with serial numbers
58 and UUIDs masked off 60 and UUIDs masked off
59 - which driver features work and which don't 61 - which driver features work and which don't
@@ -66,17 +68,18 @@ Installation
66------------ 68------------
67 69
68If you are compiling this driver as included in the Linux kernel 70If you are compiling this driver as included in the Linux kernel
69sources, simply enable the CONFIG_THINKPAD_ACPI option, and optionally 71sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
70enable the CONFIG_THINKPAD_ACPI_BAY option if you want the 72It is located on the menu path: "Device Drivers" -> "X86 Platform
71thinkpad-specific bay functionality. 73Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
74
72 75
73Features 76Features
74-------- 77--------
75 78
76The driver exports two different interfaces to userspace, which can be 79The driver exports two different interfaces to userspace, which can be
77used to access the features it provides. One is a legacy procfs-based 80used to access the features it provides. One is a legacy procfs-based
78interface, which will be removed at some time in the distant future. 81interface, which will be removed at some time in the future. The other
79The other is a new sysfs-based interface which is not complete yet. 82is a new sysfs-based interface which is not complete yet.
80 83
81The procfs interface creates the /proc/acpi/ibm directory. There is a 84The procfs interface creates the /proc/acpi/ibm directory. There is a
82file under that directory for each feature it supports. The procfs 85file under that directory for each feature it supports. The procfs
@@ -111,15 +114,17 @@ The version of thinkpad-acpi's sysfs interface is exported by the driver
111as a driver attribute (see below). 114as a driver attribute (see below).
112 115
113Sysfs driver attributes are on the driver's sysfs attribute space, 116Sysfs driver attributes are on the driver's sysfs attribute space,
114for 2.6.23 this is /sys/bus/platform/drivers/thinkpad_acpi/ and 117for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
115/sys/bus/platform/drivers/thinkpad_hwmon/ 118/sys/bus/platform/drivers/thinkpad_hwmon/
116 119
117Sysfs device attributes are on the thinkpad_acpi device sysfs attribute 120Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
118space, for 2.6.23 this is /sys/devices/platform/thinkpad_acpi/. 121space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
119 122
120Sysfs device attributes for the sensors and fan are on the 123Sysfs device attributes for the sensors and fan are on the
121thinkpad_hwmon device's sysfs attribute space, but you should locate it 124thinkpad_hwmon device's sysfs attribute space, but you should locate it
122looking for a hwmon device with the name attribute of "thinkpad". 125looking for a hwmon device with the name attribute of "thinkpad", or
126better yet, through libsensors.
127
123 128
124Driver version 129Driver version
125-------------- 130--------------
@@ -129,6 +134,7 @@ sysfs driver attribute: version
129 134
130The driver name and version. No commands can be written to this file. 135The driver name and version. No commands can be written to this file.
131 136
137
132Sysfs interface version 138Sysfs interface version
133----------------------- 139-----------------------
134 140
@@ -160,6 +166,7 @@ expect that an attribute might not be there, and deal with it properly
160(an attribute not being there *is* a valid way to make it clear that a 166(an attribute not being there *is* a valid way to make it clear that a
161feature is not available in sysfs). 167feature is not available in sysfs).
162 168
169
163Hot keys 170Hot keys
164-------- 171--------
165 172
@@ -172,17 +179,14 @@ system. Enabling the hotkey functionality of thinkpad-acpi signals the
172firmware that such a driver is present, and modifies how the ThinkPad 179firmware that such a driver is present, and modifies how the ThinkPad
173firmware will behave in many situations. 180firmware will behave in many situations.
174 181
175The driver enables the hot key feature automatically when loaded. The 182The driver enables the HKEY ("hot key") event reporting automatically
176feature can later be disabled and enabled back at runtime. The driver 183when loaded, and disables it when it is removed.
177will also restore the hot key feature to its previous state and mask
178when it is unloaded.
179 184
180When the hotkey feature is enabled and the hot key mask is set (see 185The driver will report HKEY events in the following format:
181below), the driver will report HKEY events in the following format:
182 186
183 ibm/hotkey HKEY 00000080 0000xxxx 187 ibm/hotkey HKEY 00000080 0000xxxx
184 188
185Some of these events refer to hot key presses, but not all. 189Some of these events refer to hot key presses, but not all of them.
186 190
187The driver will generate events over the input layer for hot keys and 191The driver will generate events over the input layer for hot keys and
188radio switches, and over the ACPI netlink layer for other events. The 192radio switches, and over the ACPI netlink layer for other events. The
@@ -214,13 +218,17 @@ procfs notes:
214 218
215The following commands can be written to the /proc/acpi/ibm/hotkey file: 219The following commands can be written to the /proc/acpi/ibm/hotkey file:
216 220
217 echo enable > /proc/acpi/ibm/hotkey -- enable the hot keys feature
218 echo disable > /proc/acpi/ibm/hotkey -- disable the hot keys feature
219 echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys 221 echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
220 echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys 222 echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
221 ... any other 8-hex-digit mask ... 223 ... any other 8-hex-digit mask ...
222 echo reset > /proc/acpi/ibm/hotkey -- restore the original mask 224 echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
223 225
226The following commands have been deprecated and will cause the kernel
227to log a warning:
228
229 echo enable > /proc/acpi/ibm/hotkey -- does nothing
230 echo disable > /proc/acpi/ibm/hotkey -- returns an error
231
224The procfs interface does not support NVRAM polling control. So as to 232The procfs interface does not support NVRAM polling control. So as to
225maintain maximum bug-to-bug compatibility, it does not report any masks, 233maintain maximum bug-to-bug compatibility, it does not report any masks,
226nor does it allow one to manipulate the hot key mask when the firmware 234nor does it allow one to manipulate the hot key mask when the firmware
@@ -229,12 +237,9 @@ does not support masks at all, even if NVRAM polling is in use.
229sysfs notes: 237sysfs notes:
230 238
231 hotkey_bios_enabled: 239 hotkey_bios_enabled:
232 Returns the status of the hot keys feature when 240 DEPRECATED, WILL BE REMOVED SOON.
233 thinkpad-acpi was loaded. Upon module unload, the hot
234 key feature status will be restored to this value.
235 241
236 0: hot keys were disabled 242 Returns 0.
237 1: hot keys were enabled (unusual)
238 243
239 hotkey_bios_mask: 244 hotkey_bios_mask:
240 Returns the hot keys mask when thinkpad-acpi was loaded. 245 Returns the hot keys mask when thinkpad-acpi was loaded.
@@ -242,13 +247,10 @@ sysfs notes:
242 to this value. 247 to this value.
243 248
244 hotkey_enable: 249 hotkey_enable:
245 Enables/disables the hot keys feature in the ACPI 250 DEPRECATED, WILL BE REMOVED SOON.
246 firmware, and reports current status of the hot keys
247 feature. Has no effect on the NVRAM hot key polling
248 functionality.
249 251
250 0: disables the hot keys feature / feature disabled 252 0: returns -EPERM
251 1: enables the hot keys feature / feature enabled 253 1: does nothing
252 254
253 hotkey_mask: 255 hotkey_mask:
254 bit mask to enable driver-handling (and depending on 256 bit mask to enable driver-handling (and depending on
@@ -504,7 +506,7 @@ generate input device EV_KEY events.
504In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW 506In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
505events for switches: 507events for switches:
506 508
507SW_RFKILL_ALL T60 and later hardare rfkill rocker switch 509SW_RFKILL_ALL T60 and later hardware rfkill rocker switch
508SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A 510SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A
509 511
510Non hot-key ACPI HKEY event map: 512Non hot-key ACPI HKEY event map:
@@ -618,6 +620,7 @@ For Lenovo models *with* ACPI backlight control:
618 and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process 620 and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process
619 these keys on userspace somehow (e.g. by calling xbacklight). 621 these keys on userspace somehow (e.g. by calling xbacklight).
620 622
623
621Bluetooth 624Bluetooth
622--------- 625---------
623 626
@@ -628,6 +631,9 @@ sysfs rfkill class: switch "tpacpi_bluetooth_sw"
628This feature shows the presence and current state of a ThinkPad 631This feature shows the presence and current state of a ThinkPad
629Bluetooth device in the internal ThinkPad CDC slot. 632Bluetooth device in the internal ThinkPad CDC slot.
630 633
634If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
635so it is kept across reboots and power-off.
636
631Procfs notes: 637Procfs notes:
632 638
633If Bluetooth is installed, the following commands can be used: 639If Bluetooth is installed, the following commands can be used:
@@ -652,6 +658,7 @@ Sysfs notes:
652 rfkill controller switch "tpacpi_bluetooth_sw": refer to 658 rfkill controller switch "tpacpi_bluetooth_sw": refer to
653 Documentation/rfkill.txt for details. 659 Documentation/rfkill.txt for details.
654 660
661
655Video output control -- /proc/acpi/ibm/video 662Video output control -- /proc/acpi/ibm/video
656-------------------------------------------- 663--------------------------------------------
657 664
@@ -693,11 +700,8 @@ Fn-F7 from working. This also disables the video output switching
693features of this driver, as it uses the same ACPI methods as 700features of this driver, as it uses the same ACPI methods as
694Fn-F7. Video switching on the console should still work. 701Fn-F7. Video switching on the console should still work.
695 702
696UPDATE: There's now a patch for the X.org Radeon driver which 703UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
697addresses this issue. Some people are reporting success with the patch
698while others are still having problems. For more information:
699 704
700https://bugs.freedesktop.org/show_bug.cgi?id=2000
701 705
702ThinkLight control 706ThinkLight control
703------------------ 707------------------
@@ -720,10 +724,11 @@ The ThinkLight sysfs interface is documented by the LED class
720documentation, in Documentation/leds-class.txt. The ThinkLight LED name 724documentation, in Documentation/leds-class.txt. The ThinkLight LED name
721is "tpacpi::thinklight". 725is "tpacpi::thinklight".
722 726
723Due to limitations in the sysfs LED class, if the status of the thinklight 727Due to limitations in the sysfs LED class, if the status of the ThinkLight
724cannot be read or if it is unknown, thinkpad-acpi will report it as "off". 728cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
725It is impossible to know if the status returned through sysfs is valid. 729It is impossible to know if the status returned through sysfs is valid.
726 730
731
727Docking / undocking -- /proc/acpi/ibm/dock 732Docking / undocking -- /proc/acpi/ibm/dock
728------------------------------------------ 733------------------------------------------
729 734
@@ -784,6 +789,7 @@ the only docking stations currently supported are the X-series
784UltraBase docks and "dumb" port replicators like the Mini Dock (the 789UltraBase docks and "dumb" port replicators like the Mini Dock (the
785latter don't need any ACPI support, actually). 790latter don't need any ACPI support, actually).
786 791
792
787UltraBay eject -- /proc/acpi/ibm/bay 793UltraBay eject -- /proc/acpi/ibm/bay
788------------------------------------ 794------------------------------------
789 795
@@ -847,8 +853,9 @@ supported. Use "eject2" instead of "eject" for the second bay.
847Note: the UltraBay eject support on the 600e/x, A22p and A3x is 853Note: the UltraBay eject support on the 600e/x, A22p and A3x is
848EXPERIMENTAL and may not work as expected. USE WITH CAUTION! 854EXPERIMENTAL and may not work as expected. USE WITH CAUTION!
849 855
850CMOS control 856
851------------ 857CMOS/UCMS control
858-----------------
852 859
853procfs: /proc/acpi/ibm/cmos 860procfs: /proc/acpi/ibm/cmos
854sysfs device attribute: cmos_command 861sysfs device attribute: cmos_command
@@ -882,6 +889,7 @@ The cmos command interface is prone to firmware split-brain problems, as
882in newer ThinkPads it is just a compatibility layer. Do not use it, it is 889in newer ThinkPads it is just a compatibility layer. Do not use it, it is
883exported just as a debug tool. 890exported just as a debug tool.
884 891
892
885LED control 893LED control
886----------- 894-----------
887 895
@@ -893,6 +901,17 @@ some older ThinkPad models, it is possible to query the status of the
893LED indicators as well. Newer ThinkPads cannot query the real status 901LED indicators as well. Newer ThinkPads cannot query the real status
894of the LED indicators. 902of the LED indicators.
895 903
904Because misuse of the LEDs could induce an unaware user to perform
905dangerous actions (like undocking or ejecting a bay device while the
906buses are still active), or mask an important alarm (such as a nearly
907empty battery, or a broken battery), access to most LEDs is
908restricted.
909
910Unrestricted access to all LEDs requires that thinkpad-acpi be
911compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
912Distributions must never enable this option. Individual users that
913are aware of the consequences are welcome to enabling it.
914
896procfs notes: 915procfs notes:
897 916
898The available commands are: 917The available commands are:
@@ -939,6 +958,7 @@ ThinkPad indicator LED should blink in hardware accelerated mode, use the
939"timer" trigger, and leave the delay_on and delay_off parameters set to 958"timer" trigger, and leave the delay_on and delay_off parameters set to
940zero (to request hardware acceleration autodetection). 959zero (to request hardware acceleration autodetection).
941 960
961
942ACPI sounds -- /proc/acpi/ibm/beep 962ACPI sounds -- /proc/acpi/ibm/beep
943---------------------------------- 963----------------------------------
944 964
@@ -968,6 +988,7 @@ X40:
968 16 - one medium-pitched beep repeating constantly, stop with 17 988 16 - one medium-pitched beep repeating constantly, stop with 17
969 17 - stop 16 989 17 - stop 16
970 990
991
971Temperature sensors 992Temperature sensors
972------------------- 993-------------------
973 994
@@ -1115,6 +1136,7 @@ registers contain the current battery capacity, etc. If you experiment
1115with this, do send me your results (including some complete dumps with 1136with this, do send me your results (including some complete dumps with
1116a description of the conditions when they were taken.) 1137a description of the conditions when they were taken.)
1117 1138
1139
1118LCD brightness control 1140LCD brightness control
1119---------------------- 1141----------------------
1120 1142
@@ -1124,10 +1146,9 @@ sysfs backlight device "thinkpad_screen"
1124This feature allows software control of the LCD brightness on ThinkPad 1146This feature allows software control of the LCD brightness on ThinkPad
1125models which don't have a hardware brightness slider. 1147models which don't have a hardware brightness slider.
1126 1148
1127It has some limitations: the LCD backlight cannot be actually turned on or 1149It has some limitations: the LCD backlight cannot be actually turned
1128off by this interface, and in many ThinkPad models, the "dim while on 1150on or off by this interface, it just controls the backlight brightness
1129battery" functionality will be enabled by the BIOS when this interface is 1151level.
1130used, and cannot be controlled.
1131 1152
1132On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control 1153On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
1133has eight brightness levels, ranging from 0 to 7. Some of the levels 1154has eight brightness levels, ranging from 0 to 7. Some of the levels
@@ -1136,10 +1157,15 @@ display backlight brightness control methods have 16 levels, ranging
1136from 0 to 15. 1157from 0 to 15.
1137 1158
1138There are two interfaces to the firmware for direct brightness control, 1159There are two interfaces to the firmware for direct brightness control,
1139EC and CMOS. To select which one should be used, use the 1160EC and UCMS (or CMOS). To select which one should be used, use the
1140brightness_mode module parameter: brightness_mode=1 selects EC mode, 1161brightness_mode module parameter: brightness_mode=1 selects EC mode,
1141brightness_mode=2 selects CMOS mode, brightness_mode=3 selects both EC 1162brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
1142and CMOS. The driver tries to auto-detect which interface to use. 1163mode with NVRAM backing (so that brightness changes are remembered
1164across shutdown/reboot).
1165
1166The driver tries to select which interface to use from a table of
1167defaults for each ThinkPad model. If it makes a wrong choice, please
1168report this as a bug, so that we can fix it.
1143 1169
1144When display backlight brightness controls are available through the 1170When display backlight brightness controls are available through the
1145standard ACPI interface, it is best to use it instead of this direct 1171standard ACPI interface, it is best to use it instead of this direct
@@ -1201,6 +1227,7 @@ WARNING:
1201 and maybe reduce the life of the backlight lamps by needlessly kicking 1227 and maybe reduce the life of the backlight lamps by needlessly kicking
1202 its level up and down at every change. 1228 its level up and down at every change.
1203 1229
1230
1204Volume control -- /proc/acpi/ibm/volume 1231Volume control -- /proc/acpi/ibm/volume
1205--------------------------------------- 1232---------------------------------------
1206 1233
@@ -1217,6 +1244,11 @@ distinct. The unmute the volume after the mute command, use either the
1217up or down command (the level command will not unmute the volume). 1244up or down command (the level command will not unmute the volume).
1218The current volume level and mute state is shown in the file. 1245The current volume level and mute state is shown in the file.
1219 1246
1247The ALSA mixer interface to this feature is still missing, but patches
1248to add it exist. That problem should be addressed in the not so
1249distant future.
1250
1251
1220Fan control and monitoring: fan speed, fan enable/disable 1252Fan control and monitoring: fan speed, fan enable/disable
1221--------------------------------------------------------- 1253---------------------------------------------------------
1222 1254
@@ -1383,8 +1415,11 @@ procfs: /proc/acpi/ibm/wan
1383sysfs device attribute: wwan_enable (deprecated) 1415sysfs device attribute: wwan_enable (deprecated)
1384sysfs rfkill class: switch "tpacpi_wwan_sw" 1416sysfs rfkill class: switch "tpacpi_wwan_sw"
1385 1417
1386This feature shows the presence and current state of a W-WAN (Sierra 1418This feature shows the presence and current state of the built-in
1387Wireless EV-DO) device. 1419Wireless WAN device.
1420
1421If the ThinkPad supports it, the WWAN state is stored in NVRAM,
1422so it is kept across reboots and power-off.
1388 1423
1389It was tested on a Lenovo ThinkPad X60. It should probably work on other 1424It was tested on a Lenovo ThinkPad X60. It should probably work on other
1390ThinkPad models which come with this module installed. 1425ThinkPad models which come with this module installed.
@@ -1413,6 +1448,7 @@ Sysfs notes:
1413 rfkill controller switch "tpacpi_wwan_sw": refer to 1448 rfkill controller switch "tpacpi_wwan_sw": refer to
1414 Documentation/rfkill.txt for details. 1449 Documentation/rfkill.txt for details.
1415 1450
1451
1416EXPERIMENTAL: UWB 1452EXPERIMENTAL: UWB
1417----------------- 1453-----------------
1418 1454
@@ -1431,6 +1467,7 @@ Sysfs notes:
1431 rfkill controller switch "tpacpi_uwb_sw": refer to 1467 rfkill controller switch "tpacpi_uwb_sw": refer to
1432 Documentation/rfkill.txt for details. 1468 Documentation/rfkill.txt for details.
1433 1469
1470
1434Multiple Commands, Module Parameters 1471Multiple Commands, Module Parameters
1435------------------------------------ 1472------------------------------------
1436 1473
@@ -1445,6 +1482,7 @@ for example:
1445 1482
1446 modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable 1483 modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
1447 1484
1485
1448Enabling debugging output 1486Enabling debugging output
1449------------------------- 1487-------------------------
1450 1488
@@ -1457,8 +1495,15 @@ will enable all debugging output classes. It takes a bitmask, so
1457to enable more than one output class, just add their values. 1495to enable more than one output class, just add their values.
1458 1496
1459 Debug bitmask Description 1497 Debug bitmask Description
1498 0x8000 Disclose PID of userspace programs
1499 accessing some functions of the driver
1460 0x0001 Initialization and probing 1500 0x0001 Initialization and probing
1461 0x0002 Removal 1501 0x0002 Removal
1502 0x0004 RF Transmitter control (RFKILL)
1503 (bluetooth, WWAN, UWB...)
1504 0x0008 HKEY event interface, hotkeys
1505 0x0010 Fan control
1506 0x0020 Backlight brightness
1462 1507
1463There is also a kernel build option to enable more debugging 1508There is also a kernel build option to enable more debugging
1464information, which may be necessary to debug driver problems. 1509information, which may be necessary to debug driver problems.
@@ -1467,6 +1512,7 @@ The level of debugging information output by the driver can be changed
1467at runtime through sysfs, using the driver attribute debug_level. The 1512at runtime through sysfs, using the driver attribute debug_level. The
1468attribute takes the same bitmask as the debug module parameter above. 1513attribute takes the same bitmask as the debug module parameter above.
1469 1514
1515
1470Force loading of module 1516Force loading of module
1471----------------------- 1517-----------------------
1472 1518
@@ -1505,3 +1551,7 @@ Sysfs interface changelog:
1505 1551
15060x020200: Add poll()/select() support to the following attributes: 15520x020200: Add poll()/select() support to the following attributes:
1507 hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason 1553 hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
1554
15550x020300: hotkey enable/disable support removed, attributes
1556 hotkey_bios_enabled and hotkey_enable deprecated and
1557 marked for removal.
diff --git a/Documentation/lguest/.gitignore b/Documentation/lguest/.gitignore
new file mode 100644
index 000000000000..115587fd5f65
--- /dev/null
+++ b/Documentation/lguest/.gitignore
@@ -0,0 +1 @@
lguest
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 1f4f9e888bd1..28c8cdfcafd8 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -1,6 +1,5 @@
1# This creates the demonstration utility "lguest" which runs a Linux guest. 1# This creates the demonstration utility "lguest" which runs a Linux guest.
2CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE 2CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
3LDLIBS:=-lz
4 3
5all: lguest 4all: lguest
6 5
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index f2dbbf3bdeab..9ebcd6ef361b 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -16,6 +16,7 @@
16#include <sys/types.h> 16#include <sys/types.h>
17#include <sys/stat.h> 17#include <sys/stat.h>
18#include <sys/wait.h> 18#include <sys/wait.h>
19#include <sys/eventfd.h>
19#include <fcntl.h> 20#include <fcntl.h>
20#include <stdbool.h> 21#include <stdbool.h>
21#include <errno.h> 22#include <errno.h>
@@ -59,7 +60,6 @@ typedef uint8_t u8;
59/*:*/ 60/*:*/
60 61
61#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ 62#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
62#define NET_PEERNUM 1
63#define BRIDGE_PFX "bridge:" 63#define BRIDGE_PFX "bridge:"
64#ifndef SIOCBRADDIF 64#ifndef SIOCBRADDIF
65#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ 65#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
@@ -76,19 +76,12 @@ static bool verbose;
76 do { if (verbose) printf(args); } while(0) 76 do { if (verbose) printf(args); } while(0)
77/*:*/ 77/*:*/
78 78
79/* File descriptors for the Waker. */
80struct {
81 int pipe[2];
82 int lguest_fd;
83} waker_fds;
84
85/* The pointer to the start of guest memory. */ 79/* The pointer to the start of guest memory. */
86static void *guest_base; 80static void *guest_base;
87/* The maximum guest physical address allowed, and maximum possible. */ 81/* The maximum guest physical address allowed, and maximum possible. */
88static unsigned long guest_limit, guest_max; 82static unsigned long guest_limit, guest_max;
89/* The pipe for signal hander to write to. */ 83/* The /dev/lguest file descriptor. */
90static int timeoutpipe[2]; 84static int lguest_fd;
91static unsigned int timeout_usec = 500;
92 85
93/* a per-cpu variable indicating whose vcpu is currently running */ 86/* a per-cpu variable indicating whose vcpu is currently running */
94static unsigned int __thread cpu_id; 87static unsigned int __thread cpu_id;
@@ -96,11 +89,6 @@ static unsigned int __thread cpu_id;
96/* This is our list of devices. */ 89/* This is our list of devices. */
97struct device_list 90struct device_list
98{ 91{
99 /* Summary information about the devices in our list: ready to pass to
100 * select() to ask which need servicing.*/
101 fd_set infds;
102 int max_infd;
103
104 /* Counter to assign interrupt numbers. */ 92 /* Counter to assign interrupt numbers. */
105 unsigned int next_irq; 93 unsigned int next_irq;
106 94
@@ -126,22 +114,21 @@ struct device
126 /* The linked-list pointer. */ 114 /* The linked-list pointer. */
127 struct device *next; 115 struct device *next;
128 116
129 /* The this device's descriptor, as mapped into the Guest. */ 117 /* The device's descriptor, as mapped into the Guest. */
130 struct lguest_device_desc *desc; 118 struct lguest_device_desc *desc;
131 119
120 /* We can't trust desc values once Guest has booted: we use these. */
121 unsigned int feature_len;
122 unsigned int num_vq;
123
132 /* The name of this device, for --verbose. */ 124 /* The name of this device, for --verbose. */
133 const char *name; 125 const char *name;
134 126
135 /* If handle_input is set, it wants to be called when this file
136 * descriptor is ready. */
137 int fd;
138 bool (*handle_input)(int fd, struct device *me);
139
140 /* Any queues attached to this device */ 127 /* Any queues attached to this device */
141 struct virtqueue *vq; 128 struct virtqueue *vq;
142 129
143 /* Handle status being finalized (ie. feature bits stable). */ 130 /* Is it operational */
144 void (*ready)(struct device *me); 131 bool running;
145 132
146 /* Device-specific data. */ 133 /* Device-specific data. */
147 void *priv; 134 void *priv;
@@ -164,22 +151,28 @@ struct virtqueue
164 /* Last available index we saw. */ 151 /* Last available index we saw. */
165 u16 last_avail_idx; 152 u16 last_avail_idx;
166 153
167 /* The routine to call when the Guest pings us, or timeout. */ 154 /* How many are used since we sent last irq? */
168 void (*handle_output)(int fd, struct virtqueue *me, bool timeout); 155 unsigned int pending_used;
169 156
170 /* Outstanding buffers */ 157 /* Eventfd where Guest notifications arrive. */
171 unsigned int inflight; 158 int eventfd;
172 159
173 /* Is this blocked awaiting a timer? */ 160 /* Function for the thread which is servicing this virtqueue. */
174 bool blocked; 161 void (*service)(struct virtqueue *vq);
162 pid_t thread;
175}; 163};
176 164
177/* Remember the arguments to the program so we can "reboot" */ 165/* Remember the arguments to the program so we can "reboot" */
178static char **main_args; 166static char **main_args;
179 167
180/* Since guest is UP and we don't run at the same time, we don't need barriers. 168/* The original tty settings to restore on exit. */
181 * But I include them in the code in case others copy it. */ 169static struct termios orig_term;
182#define wmb() 170
171/* We have to be careful with barriers: our devices are all run in separate
172 * threads and so we need to make sure that changes visible to the Guest happen
173 * in precise order. */
174#define wmb() __asm__ __volatile__("" : : : "memory")
175#define mb() __asm__ __volatile__("" : : : "memory")
183 176
184/* Convert an iovec element to the given type. 177/* Convert an iovec element to the given type.
185 * 178 *
@@ -245,7 +238,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
245static u8 *get_feature_bits(struct device *dev) 238static u8 *get_feature_bits(struct device *dev)
246{ 239{
247 return (u8 *)(dev->desc + 1) 240 return (u8 *)(dev->desc + 1)
248 + dev->desc->num_vq * sizeof(struct lguest_vqconfig); 241 + dev->num_vq * sizeof(struct lguest_vqconfig);
249} 242}
250 243
251/*L:100 The Launcher code itself takes us out into userspace, that scary place 244/*L:100 The Launcher code itself takes us out into userspace, that scary place
@@ -505,99 +498,19 @@ static void concat(char *dst, char *args[])
505 * saw the arguments it expects when we looked at initialize() in lguest_user.c: 498 * saw the arguments it expects when we looked at initialize() in lguest_user.c:
506 * the base of Guest "physical" memory, the top physical page to allow and the 499 * the base of Guest "physical" memory, the top physical page to allow and the
507 * entry point for the Guest. */ 500 * entry point for the Guest. */
508static int tell_kernel(unsigned long start) 501static void tell_kernel(unsigned long start)
509{ 502{
510 unsigned long args[] = { LHREQ_INITIALIZE, 503 unsigned long args[] = { LHREQ_INITIALIZE,
511 (unsigned long)guest_base, 504 (unsigned long)guest_base,
512 guest_limit / getpagesize(), start }; 505 guest_limit / getpagesize(), start };
513 int fd;
514
515 verbose("Guest: %p - %p (%#lx)\n", 506 verbose("Guest: %p - %p (%#lx)\n",
516 guest_base, guest_base + guest_limit, guest_limit); 507 guest_base, guest_base + guest_limit, guest_limit);
517 fd = open_or_die("/dev/lguest", O_RDWR); 508 lguest_fd = open_or_die("/dev/lguest", O_RDWR);
518 if (write(fd, args, sizeof(args)) < 0) 509 if (write(lguest_fd, args, sizeof(args)) < 0)
519 err(1, "Writing to /dev/lguest"); 510 err(1, "Writing to /dev/lguest");
520
521 /* We return the /dev/lguest file descriptor to control this Guest */
522 return fd;
523} 511}
524/*:*/ 512/*:*/
525 513
526static void add_device_fd(int fd)
527{
528 FD_SET(fd, &devices.infds);
529 if (fd > devices.max_infd)
530 devices.max_infd = fd;
531}
532
533/*L:200
534 * The Waker.
535 *
536 * With console, block and network devices, we can have lots of input which we
537 * need to process. We could try to tell the kernel what file descriptors to
538 * watch, but handing a file descriptor mask through to the kernel is fairly
539 * icky.
540 *
541 * Instead, we clone off a thread which watches the file descriptors and writes
542 * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
543 * stop running the Guest. This causes the Launcher to return from the
544 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
545 * the LHREQ_BREAK and wake us up again.
546 *
547 * This, of course, is merely a different *kind* of icky.
548 *
549 * Given my well-known antipathy to threads, I'd prefer to use processes. But
550 * it's easier to share Guest memory with threads, and trivial to share the
551 * devices.infds as the Launcher changes it.
552 */
553static int waker(void *unused)
554{
555 /* Close the write end of the pipe: only the Launcher has it open. */
556 close(waker_fds.pipe[1]);
557
558 for (;;) {
559 fd_set rfds = devices.infds;
560 unsigned long args[] = { LHREQ_BREAK, 1 };
561 unsigned int maxfd = devices.max_infd;
562
563 /* We also listen to the pipe from the Launcher. */
564 FD_SET(waker_fds.pipe[0], &rfds);
565 if (waker_fds.pipe[0] > maxfd)
566 maxfd = waker_fds.pipe[0];
567
568 /* Wait until input is ready from one of the devices. */
569 select(maxfd+1, &rfds, NULL, NULL, NULL);
570
571 /* Message from Launcher? */
572 if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
573 char c;
574 /* If this fails, then assume Launcher has exited.
575 * Don't do anything on exit: we're just a thread! */
576 if (read(waker_fds.pipe[0], &c, 1) != 1)
577 _exit(0);
578 continue;
579 }
580
581 /* Send LHREQ_BREAK command to snap the Launcher out of it. */
582 pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
583 }
584 return 0;
585}
586
587/* This routine just sets up a pipe to the Waker process. */
588static void setup_waker(int lguest_fd)
589{
590 /* This pipe is closed when Launcher dies, telling Waker. */
591 if (pipe(waker_fds.pipe) != 0)
592 err(1, "Creating pipe for Waker");
593
594 /* Waker also needs to know the lguest fd */
595 waker_fds.lguest_fd = lguest_fd;
596
597 if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
598 err(1, "Creating Waker");
599}
600
601/* 514/*
602 * Device Handling. 515 * Device Handling.
603 * 516 *
@@ -623,49 +536,90 @@ static void *_check_pointer(unsigned long addr, unsigned int size,
623/* Each buffer in the virtqueues is actually a chain of descriptors. This 536/* Each buffer in the virtqueues is actually a chain of descriptors. This
624 * function returns the next descriptor in the chain, or vq->vring.num if we're 537 * function returns the next descriptor in the chain, or vq->vring.num if we're
625 * at the end. */ 538 * at the end. */
626static unsigned next_desc(struct virtqueue *vq, unsigned int i) 539static unsigned next_desc(struct vring_desc *desc,
540 unsigned int i, unsigned int max)
627{ 541{
628 unsigned int next; 542 unsigned int next;
629 543
630 /* If this descriptor says it doesn't chain, we're done. */ 544 /* If this descriptor says it doesn't chain, we're done. */
631 if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT)) 545 if (!(desc[i].flags & VRING_DESC_F_NEXT))
632 return vq->vring.num; 546 return max;
633 547
634 /* Check they're not leading us off end of descriptors. */ 548 /* Check they're not leading us off end of descriptors. */
635 next = vq->vring.desc[i].next; 549 next = desc[i].next;
636 /* Make sure compiler knows to grab that: we don't want it changing! */ 550 /* Make sure compiler knows to grab that: we don't want it changing! */
637 wmb(); 551 wmb();
638 552
639 if (next >= vq->vring.num) 553 if (next >= max)
640 errx(1, "Desc next is %u", next); 554 errx(1, "Desc next is %u", next);
641 555
642 return next; 556 return next;
643} 557}
644 558
559/* This actually sends the interrupt for this virtqueue */
560static void trigger_irq(struct virtqueue *vq)
561{
562 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
563
564 /* Don't inform them if nothing used. */
565 if (!vq->pending_used)
566 return;
567 vq->pending_used = 0;
568
569 /* If they don't want an interrupt, don't send one, unless empty. */
570 if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
571 && lg_last_avail(vq) != vq->vring.avail->idx)
572 return;
573
574 /* Send the Guest an interrupt tell them we used something up. */
575 if (write(lguest_fd, buf, sizeof(buf)) != 0)
576 err(1, "Triggering irq %i", vq->config.irq);
577}
578
645/* This looks in the virtqueue and for the first available buffer, and converts 579/* This looks in the virtqueue and for the first available buffer, and converts
646 * it to an iovec for convenient access. Since descriptors consist of some 580 * it to an iovec for convenient access. Since descriptors consist of some
647 * number of output then some number of input descriptors, it's actually two 581 * number of output then some number of input descriptors, it's actually two
648 * iovecs, but we pack them into one and note how many of each there were. 582 * iovecs, but we pack them into one and note how many of each there were.
649 * 583 *
650 * This function returns the descriptor number found, or vq->vring.num (which 584 * This function returns the descriptor number found. */
651 * is never a valid descriptor number) if none was found. */ 585static unsigned wait_for_vq_desc(struct virtqueue *vq,
652static unsigned get_vq_desc(struct virtqueue *vq, 586 struct iovec iov[],
653 struct iovec iov[], 587 unsigned int *out_num, unsigned int *in_num)
654 unsigned int *out_num, unsigned int *in_num)
655{ 588{
656 unsigned int i, head; 589 unsigned int i, head, max;
657 u16 last_avail; 590 struct vring_desc *desc;
591 u16 last_avail = lg_last_avail(vq);
592
593 while (last_avail == vq->vring.avail->idx) {
594 u64 event;
595
596 /* OK, tell Guest about progress up to now. */
597 trigger_irq(vq);
598
599 /* OK, now we need to know about added descriptors. */
600 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
601
602 /* They could have slipped one in as we were doing that: make
603 * sure it's written, then check again. */
604 mb();
605 if (last_avail != vq->vring.avail->idx) {
606 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
607 break;
608 }
609
610 /* Nothing new? Wait for eventfd to tell us they refilled. */
611 if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
612 errx(1, "Event read failed?");
613
614 /* We don't need to be notified again. */
615 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
616 }
658 617
659 /* Check it isn't doing very strange things with descriptor numbers. */ 618 /* Check it isn't doing very strange things with descriptor numbers. */
660 last_avail = lg_last_avail(vq);
661 if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) 619 if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
662 errx(1, "Guest moved used index from %u to %u", 620 errx(1, "Guest moved used index from %u to %u",
663 last_avail, vq->vring.avail->idx); 621 last_avail, vq->vring.avail->idx);
664 622
665 /* If there's nothing new since last we looked, return invalid. */
666 if (vq->vring.avail->idx == last_avail)
667 return vq->vring.num;
668
669 /* Grab the next descriptor number they're advertising, and increment 623 /* Grab the next descriptor number they're advertising, and increment
670 * the index we've seen. */ 624 * the index we've seen. */
671 head = vq->vring.avail->ring[last_avail % vq->vring.num]; 625 head = vq->vring.avail->ring[last_avail % vq->vring.num];
@@ -678,15 +632,28 @@ static unsigned get_vq_desc(struct virtqueue *vq,
678 /* When we start there are none of either input nor output. */ 632 /* When we start there are none of either input nor output. */
679 *out_num = *in_num = 0; 633 *out_num = *in_num = 0;
680 634
635 max = vq->vring.num;
636 desc = vq->vring.desc;
681 i = head; 637 i = head;
638
639 /* If this is an indirect entry, then this buffer contains a descriptor
640 * table which we handle as if it's any normal descriptor chain. */
641 if (desc[i].flags & VRING_DESC_F_INDIRECT) {
642 if (desc[i].len % sizeof(struct vring_desc))
643 errx(1, "Invalid size for indirect buffer table");
644
645 max = desc[i].len / sizeof(struct vring_desc);
646 desc = check_pointer(desc[i].addr, desc[i].len);
647 i = 0;
648 }
649
682 do { 650 do {
683 /* Grab the first descriptor, and check it's OK. */ 651 /* Grab the first descriptor, and check it's OK. */
684 iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len; 652 iov[*out_num + *in_num].iov_len = desc[i].len;
685 iov[*out_num + *in_num].iov_base 653 iov[*out_num + *in_num].iov_base
686 = check_pointer(vq->vring.desc[i].addr, 654 = check_pointer(desc[i].addr, desc[i].len);
687 vq->vring.desc[i].len);
688 /* If this is an input descriptor, increment that count. */ 655 /* If this is an input descriptor, increment that count. */
689 if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE) 656 if (desc[i].flags & VRING_DESC_F_WRITE)
690 (*in_num)++; 657 (*in_num)++;
691 else { 658 else {
692 /* If it's an output descriptor, they're all supposed 659 /* If it's an output descriptor, they're all supposed
@@ -697,11 +664,10 @@ static unsigned get_vq_desc(struct virtqueue *vq,
697 } 664 }
698 665
699 /* If we've got too many, that implies a descriptor loop. */ 666 /* If we've got too many, that implies a descriptor loop. */
700 if (*out_num + *in_num > vq->vring.num) 667 if (*out_num + *in_num > max)
701 errx(1, "Looped descriptor"); 668 errx(1, "Looped descriptor");
702 } while ((i = next_desc(vq, i)) != vq->vring.num); 669 } while ((i = next_desc(desc, i, max)) != max);
703 670
704 vq->inflight++;
705 return head; 671 return head;
706} 672}
707 673
@@ -719,44 +685,20 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
719 /* Make sure buffer is written before we update index. */ 685 /* Make sure buffer is written before we update index. */
720 wmb(); 686 wmb();
721 vq->vring.used->idx++; 687 vq->vring.used->idx++;
722 vq->inflight--; 688 vq->pending_used++;
723}
724
725/* This actually sends the interrupt for this virtqueue */
726static void trigger_irq(int fd, struct virtqueue *vq)
727{
728 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
729
730 /* If they don't want an interrupt, don't send one, unless empty. */
731 if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
732 && vq->inflight)
733 return;
734
735 /* Send the Guest an interrupt tell them we used something up. */
736 if (write(fd, buf, sizeof(buf)) != 0)
737 err(1, "Triggering irq %i", vq->config.irq);
738} 689}
739 690
740/* And here's the combo meal deal. Supersize me! */ 691/* And here's the combo meal deal. Supersize me! */
741static void add_used_and_trigger(int fd, struct virtqueue *vq, 692static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
742 unsigned int head, int len)
743{ 693{
744 add_used(vq, head, len); 694 add_used(vq, head, len);
745 trigger_irq(fd, vq); 695 trigger_irq(vq);
746} 696}
747 697
748/* 698/*
749 * The Console 699 * The Console
750 * 700 *
751 * Here is the input terminal setting we save, and the routine to restore them 701 * We associate some data with the console for our exit hack. */
752 * on exit so the user gets their terminal back. */
753static struct termios orig_term;
754static void restore_term(void)
755{
756 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
757}
758
759/* We associate some data with the console for our exit hack. */
760struct console_abort 702struct console_abort
761{ 703{
762 /* How many times have they hit ^C? */ 704 /* How many times have they hit ^C? */
@@ -766,276 +708,275 @@ struct console_abort
766}; 708};
767 709
768/* This is the routine which handles console input (ie. stdin). */ 710/* This is the routine which handles console input (ie. stdin). */
769static bool handle_console_input(int fd, struct device *dev) 711static void console_input(struct virtqueue *vq)
770{ 712{
771 int len; 713 int len;
772 unsigned int head, in_num, out_num; 714 unsigned int head, in_num, out_num;
773 struct iovec iov[dev->vq->vring.num]; 715 struct console_abort *abort = vq->dev->priv;
774 struct console_abort *abort = dev->priv; 716 struct iovec iov[vq->vring.num];
775
776 /* First we need a console buffer from the Guests's input virtqueue. */
777 head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
778
779 /* If they're not ready for input, stop listening to this file
780 * descriptor. We'll start again once they add an input buffer. */
781 if (head == dev->vq->vring.num)
782 return false;
783 717
718 /* Make sure there's a descriptor waiting. */
719 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
784 if (out_num) 720 if (out_num)
785 errx(1, "Output buffers in console in queue?"); 721 errx(1, "Output buffers in console in queue?");
786 722
787 /* This is why we convert to iovecs: the readv() call uses them, and so 723 /* Read it in. */
788 * it reads straight into the Guest's buffer. */ 724 len = readv(STDIN_FILENO, iov, in_num);
789 len = readv(dev->fd, iov, in_num);
790 if (len <= 0) { 725 if (len <= 0) {
791 /* This implies that the console is closed, is /dev/null, or 726 /* Ran out of input? */
792 * something went terribly wrong. */
793 warnx("Failed to get console input, ignoring console."); 727 warnx("Failed to get console input, ignoring console.");
794 /* Put the input terminal back. */ 728 /* For simplicity, dying threads kill the whole Launcher. So
795 restore_term(); 729 * just nap here. */
796 /* Remove callback from input vq, so it doesn't restart us. */ 730 for (;;)
797 dev->vq->handle_output = NULL; 731 pause();
798 /* Stop listening to this fd: don't call us again. */
799 return false;
800 } 732 }
801 733
802 /* Tell the Guest about the new input. */ 734 add_used_and_trigger(vq, head, len);
803 add_used_and_trigger(fd, dev->vq, head, len);
804 735
805 /* Three ^C within one second? Exit. 736 /* Three ^C within one second? Exit.
806 * 737 *
807 * This is such a hack, but works surprisingly well. Each ^C has to be 738 * This is such a hack, but works surprisingly well. Each ^C has to
808 * in a buffer by itself, so they can't be too fast. But we check that 739 * be in a buffer by itself, so they can't be too fast. But we check
809 * we get three within about a second, so they can't be too slow. */ 740 * that we get three within about a second, so they can't be too
810 if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { 741 * slow. */
811 if (!abort->count++) 742 if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
812 gettimeofday(&abort->start, NULL);
813 else if (abort->count == 3) {
814 struct timeval now;
815 gettimeofday(&now, NULL);
816 if (now.tv_sec <= abort->start.tv_sec+1) {
817 unsigned long args[] = { LHREQ_BREAK, 0 };
818 /* Close the fd so Waker will know it has to
819 * exit. */
820 close(waker_fds.pipe[1]);
821 /* Just in case Waker is blocked in BREAK, send
822 * unbreak now. */
823 write(fd, args, sizeof(args));
824 exit(2);
825 }
826 abort->count = 0;
827 }
828 } else
829 /* Any other key resets the abort counter. */
830 abort->count = 0; 743 abort->count = 0;
744 return;
745 }
831 746
832 /* Everything went OK! */ 747 abort->count++;
833 return true; 748 if (abort->count == 1)
749 gettimeofday(&abort->start, NULL);
750 else if (abort->count == 3) {
751 struct timeval now;
752 gettimeofday(&now, NULL);
753 /* Kill all Launcher processes with SIGINT, like normal ^C */
754 if (now.tv_sec <= abort->start.tv_sec+1)
755 kill(0, SIGINT);
756 abort->count = 0;
757 }
834} 758}
835 759
836/* Handling output for console is simple: we just get all the output buffers 760/* This is the routine which handles console output (ie. stdout). */
837 * and write them to stdout. */ 761static void console_output(struct virtqueue *vq)
838static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
839{ 762{
840 unsigned int head, out, in; 763 unsigned int head, out, in;
841 int len;
842 struct iovec iov[vq->vring.num]; 764 struct iovec iov[vq->vring.num];
843 765
844 /* Keep getting output buffers from the Guest until we run out. */ 766 head = wait_for_vq_desc(vq, iov, &out, &in);
845 while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { 767 if (in)
846 if (in) 768 errx(1, "Input buffers in console output queue?");
847 errx(1, "Input buffers in output queue?"); 769 while (!iov_empty(iov, out)) {
848 len = writev(STDOUT_FILENO, iov, out); 770 int len = writev(STDOUT_FILENO, iov, out);
849 add_used_and_trigger(fd, vq, head, len); 771 if (len <= 0)
772 err(1, "Write to stdout gave %i", len);
773 iov_consume(iov, out, len);
850 } 774 }
851} 775 add_used(vq, head, 0);
852
853/* This is called when we no longer want to hear about Guest changes to a
854 * virtqueue. This is more efficient in high-traffic cases, but it means we
855 * have to set a timer to check if any more changes have occurred. */
856static void block_vq(struct virtqueue *vq)
857{
858 struct itimerval itm;
859
860 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
861 vq->blocked = true;
862
863 itm.it_interval.tv_sec = 0;
864 itm.it_interval.tv_usec = 0;
865 itm.it_value.tv_sec = 0;
866 itm.it_value.tv_usec = timeout_usec;
867
868 setitimer(ITIMER_REAL, &itm, NULL);
869} 776}
870 777
871/* 778/*
872 * The Network 779 * The Network
873 * 780 *
874 * Handling output for network is also simple: we get all the output buffers 781 * Handling output for network is also simple: we get all the output buffers
875 * and write them (ignoring the first element) to this device's file descriptor 782 * and write them to /dev/net/tun.
876 * (/dev/net/tun).
877 */ 783 */
878static void handle_net_output(int fd, struct virtqueue *vq, bool timeout) 784struct net_info {
785 int tunfd;
786};
787
788static void net_output(struct virtqueue *vq)
879{ 789{
880 unsigned int head, out, in, num = 0; 790 struct net_info *net_info = vq->dev->priv;
881 int len; 791 unsigned int head, out, in;
882 struct iovec iov[vq->vring.num]; 792 struct iovec iov[vq->vring.num];
883 static int last_timeout_num;
884
885 /* Keep getting output buffers from the Guest until we run out. */
886 while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
887 if (in)
888 errx(1, "Input buffers in output queue?");
889 len = writev(vq->dev->fd, iov, out);
890 if (len < 0)
891 err(1, "Writing network packet to tun");
892 add_used_and_trigger(fd, vq, head, len);
893 num++;
894 }
895 793
896 /* Block further kicks and set up a timer if we saw anything. */ 794 head = wait_for_vq_desc(vq, iov, &out, &in);
897 if (!timeout && num) 795 if (in)
898 block_vq(vq); 796 errx(1, "Input buffers in net output queue?");
899 797 if (writev(net_info->tunfd, iov, out) < 0)
900 /* We never quite know how long should we wait before we check the 798 errx(1, "Write to tun failed?");
901 * queue again for more packets. We start at 500 microseconds, and if 799 add_used(vq, head, 0);
902 * we get fewer packets than last time, we assume we made the timeout
903 * too small and increase it by 10 microseconds. Otherwise, we drop it
904 * by one microsecond every time. It seems to work well enough. */
905 if (timeout) {
906 if (num < last_timeout_num)
907 timeout_usec += 10;
908 else if (timeout_usec > 1)
909 timeout_usec--;
910 last_timeout_num = num;
911 }
912} 800}
913 801
914/* This is where we handle a packet coming in from the tun device to our 802/* Will reading from this file descriptor block? */
803static bool will_block(int fd)
804{
805 fd_set fdset;
806 struct timeval zero = { 0, 0 };
807 FD_ZERO(&fdset);
808 FD_SET(fd, &fdset);
809 return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
810}
811
812/* This is where we handle packets coming in from the tun device to our
915 * Guest. */ 813 * Guest. */
916static bool handle_tun_input(int fd, struct device *dev) 814static void net_input(struct virtqueue *vq)
917{ 815{
918 unsigned int head, in_num, out_num;
919 int len; 816 int len;
920 struct iovec iov[dev->vq->vring.num]; 817 unsigned int head, out, in;
921 818 struct iovec iov[vq->vring.num];
922 /* First we need a network buffer from the Guests's recv virtqueue. */ 819 struct net_info *net_info = vq->dev->priv;
923 head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
924 if (head == dev->vq->vring.num) {
925 /* Now, it's expected that if we try to send a packet too
926 * early, the Guest won't be ready yet. Wait until the device
927 * status says it's ready. */
928 /* FIXME: Actually want DRIVER_ACTIVE here. */
929
930 /* Now tell it we want to know if new things appear. */
931 dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
932 wmb();
933
934 /* We'll turn this back on if input buffers are registered. */
935 return false;
936 } else if (out_num)
937 errx(1, "Output buffers in network recv queue?");
938
939 /* Read the packet from the device directly into the Guest's buffer. */
940 len = readv(dev->fd, iov, in_num);
941 if (len <= 0)
942 err(1, "reading network");
943 820
944 /* Tell the Guest about the new packet. */ 821 head = wait_for_vq_desc(vq, iov, &out, &in);
945 add_used_and_trigger(fd, dev->vq, head, len); 822 if (out)
823 errx(1, "Output buffers in net input queue?");
946 824
947 verbose("tun input packet len %i [%02x %02x] (%s)\n", len, 825 /* Deliver interrupt now, since we're about to sleep. */
948 ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], 826 if (vq->pending_used && will_block(net_info->tunfd))
949 head != dev->vq->vring.num ? "sent" : "discarded"); 827 trigger_irq(vq);
950 828
951 /* All good. */ 829 len = readv(net_info->tunfd, iov, in);
952 return true; 830 if (len <= 0)
831 err(1, "Failed to read from tun.");
832 add_used(vq, head, len);
953} 833}
954 834
955/*L:215 This is the callback attached to the network and console input 835/* This is the helper to create threads. */
956 * virtqueues: it ensures we try again, in case we stopped console or net 836static int do_thread(void *_vq)
957 * delivery because Guest didn't have any buffers. */
958static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
959{ 837{
960 add_device_fd(vq->dev->fd); 838 struct virtqueue *vq = _vq;
961 /* Snap the Waker out of its select loop. */ 839
962 write(waker_fds.pipe[1], "", 1); 840 for (;;)
841 vq->service(vq);
842 return 0;
963} 843}
964 844
965static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) 845/* When a child dies, we kill our entire process group with SIGTERM. This
846 * also has the side effect that the shell restores the console for us! */
847static void kill_launcher(int signal)
966{ 848{
967 /* We don't need to know again when Guest refills receive buffer. */ 849 kill(0, SIGTERM);
968 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
969 enable_fd(fd, vq, timeout);
970} 850}
971 851
972/* When the Guest tells us they updated the status field, we handle it. */ 852static void reset_device(struct device *dev)
973static void update_device_status(struct device *dev)
974{ 853{
975 struct virtqueue *vq; 854 struct virtqueue *vq;
976 855
977 /* This is a reset. */ 856 verbose("Resetting device %s\n", dev->name);
978 if (dev->desc->status == 0) { 857
979 verbose("Resetting device %s\n", dev->name); 858 /* Clear any features they've acked. */
859 memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
980 860
981 /* Clear any features they've acked. */ 861 /* We're going to be explicitly killing threads, so ignore them. */
982 memset(get_feature_bits(dev) + dev->desc->feature_len, 0, 862 signal(SIGCHLD, SIG_IGN);
983 dev->desc->feature_len);
984 863
985 /* Zero out the virtqueues. */ 864 /* Zero out the virtqueues, get rid of their threads */
986 for (vq = dev->vq; vq; vq = vq->next) { 865 for (vq = dev->vq; vq; vq = vq->next) {
987 memset(vq->vring.desc, 0, 866 if (vq->thread != (pid_t)-1) {
988 vring_size(vq->config.num, LGUEST_VRING_ALIGN)); 867 kill(vq->thread, SIGTERM);
989 lg_last_avail(vq) = 0; 868 waitpid(vq->thread, NULL, 0);
869 vq->thread = (pid_t)-1;
990 } 870 }
991 } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { 871 memset(vq->vring.desc, 0,
872 vring_size(vq->config.num, LGUEST_VRING_ALIGN));
873 lg_last_avail(vq) = 0;
874 }
875 dev->running = false;
876
877 /* Now we care if threads die. */
878 signal(SIGCHLD, (void *)kill_launcher);
879}
880
881static void create_thread(struct virtqueue *vq)
882{
883 /* Create stack for thread and run it. Since stack grows
884 * upwards, we point the stack pointer to the end of this
885 * region. */
886 char *stack = malloc(32768);
887 unsigned long args[] = { LHREQ_EVENTFD,
888 vq->config.pfn*getpagesize(), 0 };
889
890 /* Create a zero-initialized eventfd. */
891 vq->eventfd = eventfd(0, 0);
892 if (vq->eventfd < 0)
893 err(1, "Creating eventfd");
894 args[2] = vq->eventfd;
895
896 /* Attach an eventfd to this virtqueue: it will go off
897 * when the Guest does an LHCALL_NOTIFY for this vq. */
898 if (write(lguest_fd, &args, sizeof(args)) != 0)
899 err(1, "Attaching eventfd");
900
901 /* CLONE_VM: because it has to access the Guest memory, and
902 * SIGCHLD so we get a signal if it dies. */
903 vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
904 if (vq->thread == (pid_t)-1)
905 err(1, "Creating clone");
906 /* We close our local copy, now the child has it. */
907 close(vq->eventfd);
908}
909
910static void start_device(struct device *dev)
911{
912 unsigned int i;
913 struct virtqueue *vq;
914
915 verbose("Device %s OK: offered", dev->name);
916 for (i = 0; i < dev->feature_len; i++)
917 verbose(" %02x", get_feature_bits(dev)[i]);
918 verbose(", accepted");
919 for (i = 0; i < dev->feature_len; i++)
920 verbose(" %02x", get_feature_bits(dev)
921 [dev->feature_len+i]);
922
923 for (vq = dev->vq; vq; vq = vq->next) {
924 if (vq->service)
925 create_thread(vq);
926 }
927 dev->running = true;
928}
929
930static void cleanup_devices(void)
931{
932 struct device *dev;
933
934 for (dev = devices.dev; dev; dev = dev->next)
935 reset_device(dev);
936
937 /* If we saved off the original terminal settings, restore them now. */
938 if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
939 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
940}
941
942/* When the Guest tells us they updated the status field, we handle it. */
943static void update_device_status(struct device *dev)
944{
945 /* A zero status is a reset, otherwise it's a set of flags. */
946 if (dev->desc->status == 0)
947 reset_device(dev);
948 else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
992 warnx("Device %s configuration FAILED", dev->name); 949 warnx("Device %s configuration FAILED", dev->name);
950 if (dev->running)
951 reset_device(dev);
993 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { 952 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
994 unsigned int i; 953 if (!dev->running)
995 954 start_device(dev);
996 verbose("Device %s OK: offered", dev->name);
997 for (i = 0; i < dev->desc->feature_len; i++)
998 verbose(" %02x", get_feature_bits(dev)[i]);
999 verbose(", accepted");
1000 for (i = 0; i < dev->desc->feature_len; i++)
1001 verbose(" %02x", get_feature_bits(dev)
1002 [dev->desc->feature_len+i]);
1003
1004 if (dev->ready)
1005 dev->ready(dev);
1006 } 955 }
1007} 956}
1008 957
1009/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ 958/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
1010static void handle_output(int fd, unsigned long addr) 959static void handle_output(unsigned long addr)
1011{ 960{
1012 struct device *i; 961 struct device *i;
1013 struct virtqueue *vq;
1014 962
1015 /* Check each device and virtqueue. */ 963 /* Check each device. */
1016 for (i = devices.dev; i; i = i->next) { 964 for (i = devices.dev; i; i = i->next) {
965 struct virtqueue *vq;
966
1017 /* Notifications to device descriptors update device status. */ 967 /* Notifications to device descriptors update device status. */
1018 if (from_guest_phys(addr) == i->desc) { 968 if (from_guest_phys(addr) == i->desc) {
1019 update_device_status(i); 969 update_device_status(i);
1020 return; 970 return;
1021 } 971 }
1022 972
1023 /* Notifications to virtqueues mean output has occurred. */ 973 /* Devices *can* be used before status is set to DRIVER_OK. */
1024 for (vq = i->vq; vq; vq = vq->next) { 974 for (vq = i->vq; vq; vq = vq->next) {
1025 if (vq->config.pfn != addr/getpagesize()) 975 if (addr != vq->config.pfn*getpagesize())
1026 continue; 976 continue;
1027 977 if (i->running)
1028 /* Guest should acknowledge (and set features!) before 978 errx(1, "Notification on running %s", i->name);
1029 * using the device. */ 979 start_device(i);
1030 if (i->desc->status == 0) {
1031 warnx("%s gave early output", i->name);
1032 return;
1033 }
1034
1035 if (strcmp(vq->dev->name, "console") != 0)
1036 verbose("Output to %s\n", vq->dev->name);
1037 if (vq->handle_output)
1038 vq->handle_output(fd, vq, false);
1039 return; 980 return;
1040 } 981 }
1041 } 982 }
@@ -1049,71 +990,6 @@ static void handle_output(int fd, unsigned long addr)
1049 strnlen(from_guest_phys(addr), guest_limit - addr)); 990 strnlen(from_guest_phys(addr), guest_limit - addr));
1050} 991}
1051 992
1052static void handle_timeout(int fd)
1053{
1054 char buf[32];
1055 struct device *i;
1056 struct virtqueue *vq;
1057
1058 /* Clear the pipe */
1059 read(timeoutpipe[0], buf, sizeof(buf));
1060
1061 /* Check each device and virtqueue: flush blocked ones. */
1062 for (i = devices.dev; i; i = i->next) {
1063 for (vq = i->vq; vq; vq = vq->next) {
1064 if (!vq->blocked)
1065 continue;
1066
1067 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
1068 vq->blocked = false;
1069 if (vq->handle_output)
1070 vq->handle_output(fd, vq, true);
1071 }
1072 }
1073}
1074
1075/* This is called when the Waker wakes us up: check for incoming file
1076 * descriptors. */
1077static void handle_input(int fd)
1078{
1079 /* select() wants a zeroed timeval to mean "don't wait". */
1080 struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
1081
1082 for (;;) {
1083 struct device *i;
1084 fd_set fds = devices.infds;
1085 int num;
1086
1087 num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
1088 /* Could get interrupted */
1089 if (num < 0)
1090 continue;
1091 /* If nothing is ready, we're done. */
1092 if (num == 0)
1093 break;
1094
1095 /* Otherwise, call the device(s) which have readable file
1096 * descriptors and a method of handling them. */
1097 for (i = devices.dev; i; i = i->next) {
1098 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
1099 if (i->handle_input(fd, i))
1100 continue;
1101
1102 /* If handle_input() returns false, it means we
1103 * should no longer service it. Networking and
1104 * console do this when there's no input
1105 * buffers to deliver into. Console also uses
1106 * it when it discovers that stdin is closed. */
1107 FD_CLR(i->fd, &devices.infds);
1108 }
1109 }
1110
1111 /* Is this the timeout fd? */
1112 if (FD_ISSET(timeoutpipe[0], &fds))
1113 handle_timeout(fd);
1114 }
1115}
1116
1117/*L:190 993/*L:190
1118 * Device Setup 994 * Device Setup
1119 * 995 *
@@ -1129,8 +1005,8 @@ static void handle_input(int fd)
1129static u8 *device_config(const struct device *dev) 1005static u8 *device_config(const struct device *dev)
1130{ 1006{
1131 return (void *)(dev->desc + 1) 1007 return (void *)(dev->desc + 1)
1132 + dev->desc->num_vq * sizeof(struct lguest_vqconfig) 1008 + dev->num_vq * sizeof(struct lguest_vqconfig)
1133 + dev->desc->feature_len * 2; 1009 + dev->feature_len * 2;
1134} 1010}
1135 1011
1136/* This routine allocates a new "struct lguest_device_desc" from descriptor 1012/* This routine allocates a new "struct lguest_device_desc" from descriptor
@@ -1159,7 +1035,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
1159/* Each device descriptor is followed by the description of its virtqueues. We 1035/* Each device descriptor is followed by the description of its virtqueues. We
1160 * specify how many descriptors the virtqueue is to have. */ 1036 * specify how many descriptors the virtqueue is to have. */
1161static void add_virtqueue(struct device *dev, unsigned int num_descs, 1037static void add_virtqueue(struct device *dev, unsigned int num_descs,
1162 void (*handle_output)(int, struct virtqueue *, bool)) 1038 void (*service)(struct virtqueue *))
1163{ 1039{
1164 unsigned int pages; 1040 unsigned int pages;
1165 struct virtqueue **i, *vq = malloc(sizeof(*vq)); 1041 struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1174,8 +1050,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1174 vq->next = NULL; 1050 vq->next = NULL;
1175 vq->last_avail_idx = 0; 1051 vq->last_avail_idx = 0;
1176 vq->dev = dev; 1052 vq->dev = dev;
1177 vq->inflight = 0; 1053 vq->service = service;
1178 vq->blocked = false; 1054 vq->thread = (pid_t)-1;
1179 1055
1180 /* Initialize the configuration. */ 1056 /* Initialize the configuration. */
1181 vq->config.num = num_descs; 1057 vq->config.num = num_descs;
@@ -1191,6 +1067,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1191 * yet, otherwise we'd be overwriting them. */ 1067 * yet, otherwise we'd be overwriting them. */
1192 assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); 1068 assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
1193 memcpy(device_config(dev), &vq->config, sizeof(vq->config)); 1069 memcpy(device_config(dev), &vq->config, sizeof(vq->config));
1070 dev->num_vq++;
1194 dev->desc->num_vq++; 1071 dev->desc->num_vq++;
1195 1072
1196 verbose("Virtqueue page %#lx\n", to_guest_phys(p)); 1073 verbose("Virtqueue page %#lx\n", to_guest_phys(p));
@@ -1199,15 +1076,6 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1199 * second. */ 1076 * second. */
1200 for (i = &dev->vq; *i; i = &(*i)->next); 1077 for (i = &dev->vq; *i; i = &(*i)->next);
1201 *i = vq; 1078 *i = vq;
1202
1203 /* Set the routine to call when the Guest does something to this
1204 * virtqueue. */
1205 vq->handle_output = handle_output;
1206
1207 /* As an optimization, set the advisory "Don't Notify Me" flag if we
1208 * don't have a handler */
1209 if (!handle_output)
1210 vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
1211} 1079}
1212 1080
1213/* The first half of the feature bitmask is for us to advertise features. The 1081/* The first half of the feature bitmask is for us to advertise features. The
@@ -1219,7 +1087,7 @@ static void add_feature(struct device *dev, unsigned bit)
1219 /* We can't extend the feature bits once we've added config bytes */ 1087 /* We can't extend the feature bits once we've added config bytes */
1220 if (dev->desc->feature_len <= bit / CHAR_BIT) { 1088 if (dev->desc->feature_len <= bit / CHAR_BIT) {
1221 assert(dev->desc->config_len == 0); 1089 assert(dev->desc->config_len == 0);
1222 dev->desc->feature_len = (bit / CHAR_BIT) + 1; 1090 dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
1223 } 1091 }
1224 1092
1225 features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); 1093 features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
@@ -1243,22 +1111,17 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
1243 * calling new_dev_desc() to allocate the descriptor and device memory. 1111 * calling new_dev_desc() to allocate the descriptor and device memory.
1244 * 1112 *
1245 * See what I mean about userspace being boring? */ 1113 * See what I mean about userspace being boring? */
1246static struct device *new_device(const char *name, u16 type, int fd, 1114static struct device *new_device(const char *name, u16 type)
1247 bool (*handle_input)(int, struct device *))
1248{ 1115{
1249 struct device *dev = malloc(sizeof(*dev)); 1116 struct device *dev = malloc(sizeof(*dev));
1250 1117
1251 /* Now we populate the fields one at a time. */ 1118 /* Now we populate the fields one at a time. */
1252 dev->fd = fd;
1253 /* If we have an input handler for this file descriptor, then we add it
1254 * to the device_list's fdset and maxfd. */
1255 if (handle_input)
1256 add_device_fd(dev->fd);
1257 dev->desc = new_dev_desc(type); 1119 dev->desc = new_dev_desc(type);
1258 dev->handle_input = handle_input;
1259 dev->name = name; 1120 dev->name = name;
1260 dev->vq = NULL; 1121 dev->vq = NULL;
1261 dev->ready = NULL; 1122 dev->feature_len = 0;
1123 dev->num_vq = 0;
1124 dev->running = false;
1262 1125
1263 /* Append to device list. Prepending to a single-linked list is 1126 /* Append to device list. Prepending to a single-linked list is
1264 * easier, but the user expects the devices to be arranged on the bus 1127 * easier, but the user expects the devices to be arranged on the bus
@@ -1286,13 +1149,10 @@ static void setup_console(void)
1286 * raw input stream to the Guest. */ 1149 * raw input stream to the Guest. */
1287 term.c_lflag &= ~(ISIG|ICANON|ECHO); 1150 term.c_lflag &= ~(ISIG|ICANON|ECHO);
1288 tcsetattr(STDIN_FILENO, TCSANOW, &term); 1151 tcsetattr(STDIN_FILENO, TCSANOW, &term);
1289 /* If we exit gracefully, the original settings will be
1290 * restored so the user can see what they're typing. */
1291 atexit(restore_term);
1292 } 1152 }
1293 1153
1294 dev = new_device("console", VIRTIO_ID_CONSOLE, 1154 dev = new_device("console", VIRTIO_ID_CONSOLE);
1295 STDIN_FILENO, handle_console_input); 1155
1296 /* We store the console state in dev->priv, and initialize it. */ 1156 /* We store the console state in dev->priv, and initialize it. */
1297 dev->priv = malloc(sizeof(struct console_abort)); 1157 dev->priv = malloc(sizeof(struct console_abort));
1298 ((struct console_abort *)dev->priv)->count = 0; 1158 ((struct console_abort *)dev->priv)->count = 0;
@@ -1301,31 +1161,13 @@ static void setup_console(void)
1301 * they put something the input queue, we make sure we're listening to 1161 * they put something the input queue, we make sure we're listening to
1302 * stdin. When they put something in the output queue, we write it to 1162 * stdin. When they put something in the output queue, we write it to
1303 * stdout. */ 1163 * stdout. */
1304 add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); 1164 add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
1305 add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); 1165 add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
1306 1166
1307 verbose("device %u: console\n", devices.device_num++); 1167 verbose("device %u: console\n", ++devices.device_num);
1308} 1168}
1309/*:*/ 1169/*:*/
1310 1170
1311static void timeout_alarm(int sig)
1312{
1313 write(timeoutpipe[1], "", 1);
1314}
1315
1316static void setup_timeout(void)
1317{
1318 if (pipe(timeoutpipe) != 0)
1319 err(1, "Creating timeout pipe");
1320
1321 if (fcntl(timeoutpipe[1], F_SETFL,
1322 fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
1323 err(1, "Making timeout pipe nonblocking");
1324
1325 add_device_fd(timeoutpipe[0]);
1326 signal(SIGALRM, timeout_alarm);
1327}
1328
1329/*M:010 Inter-guest networking is an interesting area. Simplest is to have a 1171/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
1330 * --sharenet=<name> option which opens or creates a named pipe. This can be 1172 * --sharenet=<name> option which opens or creates a named pipe. This can be
1331 * used to send packets to another guest in a 1:1 manner. 1173 * used to send packets to another guest in a 1:1 manner.
@@ -1447,21 +1289,23 @@ static int get_tun_device(char tapif[IFNAMSIZ])
1447static void setup_tun_net(char *arg) 1289static void setup_tun_net(char *arg)
1448{ 1290{
1449 struct device *dev; 1291 struct device *dev;
1450 int netfd, ipfd; 1292 struct net_info *net_info = malloc(sizeof(*net_info));
1293 int ipfd;
1451 u32 ip = INADDR_ANY; 1294 u32 ip = INADDR_ANY;
1452 bool bridging = false; 1295 bool bridging = false;
1453 char tapif[IFNAMSIZ], *p; 1296 char tapif[IFNAMSIZ], *p;
1454 struct virtio_net_config conf; 1297 struct virtio_net_config conf;
1455 1298
1456 netfd = get_tun_device(tapif); 1299 net_info->tunfd = get_tun_device(tapif);
1457 1300
1458 /* First we create a new network device. */ 1301 /* First we create a new network device. */
1459 dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); 1302 dev = new_device("net", VIRTIO_ID_NET);
1303 dev->priv = net_info;
1460 1304
1461 /* Network devices need a receive and a send queue, just like 1305 /* Network devices need a receive and a send queue, just like
1462 * console. */ 1306 * console. */
1463 add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd); 1307 add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
1464 add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); 1308 add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
1465 1309
1466 /* We need a socket to perform the magic network ioctls to bring up the 1310 /* We need a socket to perform the magic network ioctls to bring up the
1467 * tap interface, connect to the bridge etc. Any socket will do! */ 1311 * tap interface, connect to the bridge etc. Any socket will do! */
@@ -1502,6 +1346,8 @@ static void setup_tun_net(char *arg)
1502 add_feature(dev, VIRTIO_NET_F_HOST_TSO4); 1346 add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
1503 add_feature(dev, VIRTIO_NET_F_HOST_TSO6); 1347 add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
1504 add_feature(dev, VIRTIO_NET_F_HOST_ECN); 1348 add_feature(dev, VIRTIO_NET_F_HOST_ECN);
1349 /* We handle indirect ring entries */
1350 add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
1505 set_config(dev, sizeof(conf), &conf); 1351 set_config(dev, sizeof(conf), &conf);
1506 1352
1507 /* We don't need the socket any more; setup is done. */ 1353 /* We don't need the socket any more; setup is done. */
@@ -1550,20 +1396,18 @@ struct vblk_info
1550 * Remember that the block device is handled by a separate I/O thread. We head 1396 * Remember that the block device is handled by a separate I/O thread. We head
1551 * straight into the core of that thread here: 1397 * straight into the core of that thread here:
1552 */ 1398 */
1553static bool service_io(struct device *dev) 1399static void blk_request(struct virtqueue *vq)
1554{ 1400{
1555 struct vblk_info *vblk = dev->priv; 1401 struct vblk_info *vblk = vq->dev->priv;
1556 unsigned int head, out_num, in_num, wlen; 1402 unsigned int head, out_num, in_num, wlen;
1557 int ret; 1403 int ret;
1558 u8 *in; 1404 u8 *in;
1559 struct virtio_blk_outhdr *out; 1405 struct virtio_blk_outhdr *out;
1560 struct iovec iov[dev->vq->vring.num]; 1406 struct iovec iov[vq->vring.num];
1561 off64_t off; 1407 off64_t off;
1562 1408
1563 /* See if there's a request waiting. If not, nothing to do. */ 1409 /* Get the next request. */
1564 head = get_vq_desc(dev->vq, iov, &out_num, &in_num); 1410 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1565 if (head == dev->vq->vring.num)
1566 return false;
1567 1411
1568 /* Every block request should contain at least one output buffer 1412 /* Every block request should contain at least one output buffer
1569 * (detailing the location on disk and the type of request) and one 1413 * (detailing the location on disk and the type of request) and one
@@ -1630,83 +1474,28 @@ static bool service_io(struct device *dev)
1630 } 1474 }
1631 } 1475 }
1632 1476
1633 /* We can't trigger an IRQ, because we're not the Launcher. It does 1477 /* OK, so we noted that it was pretty poor to use an fdatasync as a
1634 * that when we tell it we're done. */ 1478 * barrier. But Christoph Hellwig points out that we need a sync
1635 add_used(dev->vq, head, wlen); 1479 * *afterwards* as well: "Barriers specify no reordering to the front
1636 return true; 1480 * or the back." And Jens Axboe confirmed it, so here we are: */
1637} 1481 if (out->type & VIRTIO_BLK_T_BARRIER)
1638 1482 fdatasync(vblk->fd);
1639/* This is the thread which actually services the I/O. */
1640static int io_thread(void *_dev)
1641{
1642 struct device *dev = _dev;
1643 struct vblk_info *vblk = dev->priv;
1644 char c;
1645
1646 /* Close other side of workpipe so we get 0 read when main dies. */
1647 close(vblk->workpipe[1]);
1648 /* Close the other side of the done_fd pipe. */
1649 close(dev->fd);
1650
1651 /* When this read fails, it means Launcher died, so we follow. */
1652 while (read(vblk->workpipe[0], &c, 1) == 1) {
1653 /* We acknowledge each request immediately to reduce latency,
1654 * rather than waiting until we've done them all. I haven't
1655 * measured to see if it makes any difference.
1656 *
1657 * That would be an interesting test, wouldn't it? You could
1658 * also try having more than one I/O thread. */
1659 while (service_io(dev))
1660 write(vblk->done_fd, &c, 1);
1661 }
1662 return 0;
1663}
1664
1665/* Now we've seen the I/O thread, we return to the Launcher to see what happens
1666 * when that thread tells us it's completed some I/O. */
1667static bool handle_io_finish(int fd, struct device *dev)
1668{
1669 char c;
1670
1671 /* If the I/O thread died, presumably it printed the error, so we
1672 * simply exit. */
1673 if (read(dev->fd, &c, 1) != 1)
1674 exit(1);
1675
1676 /* It did some work, so trigger the irq. */
1677 trigger_irq(fd, dev->vq);
1678 return true;
1679}
1680
1681/* When the Guest submits some I/O, we just need to wake the I/O thread. */
1682static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
1683{
1684 struct vblk_info *vblk = vq->dev->priv;
1685 char c = 0;
1686 1483
1687 /* Wake up I/O thread and tell it to go to work! */ 1484 add_used(vq, head, wlen);
1688 if (write(vblk->workpipe[1], &c, 1) != 1)
1689 /* Presumably it indicated why it died. */
1690 exit(1);
1691} 1485}
1692 1486
1693/*L:198 This actually sets up a virtual block device. */ 1487/*L:198 This actually sets up a virtual block device. */
1694static void setup_block_file(const char *filename) 1488static void setup_block_file(const char *filename)
1695{ 1489{
1696 int p[2];
1697 struct device *dev; 1490 struct device *dev;
1698 struct vblk_info *vblk; 1491 struct vblk_info *vblk;
1699 void *stack;
1700 struct virtio_blk_config conf; 1492 struct virtio_blk_config conf;
1701 1493
1702 /* This is the pipe the I/O thread will use to tell us I/O is done. */
1703 pipe(p);
1704
1705 /* The device responds to return from I/O thread. */ 1494 /* The device responds to return from I/O thread. */
1706 dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish); 1495 dev = new_device("block", VIRTIO_ID_BLOCK);
1707 1496
1708 /* The device has one virtqueue, where the Guest places requests. */ 1497 /* The device has one virtqueue, where the Guest places requests. */
1709 add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output); 1498 add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
1710 1499
1711 /* Allocate the room for our own bookkeeping */ 1500 /* Allocate the room for our own bookkeeping */
1712 vblk = dev->priv = malloc(sizeof(*vblk)); 1501 vblk = dev->priv = malloc(sizeof(*vblk));
@@ -1728,49 +1517,29 @@ static void setup_block_file(const char *filename)
1728 1517
1729 set_config(dev, sizeof(conf), &conf); 1518 set_config(dev, sizeof(conf), &conf);
1730 1519
1731 /* The I/O thread writes to this end of the pipe when done. */
1732 vblk->done_fd = p[1];
1733
1734 /* This is the second pipe, which is how we tell the I/O thread about
1735 * more work. */
1736 pipe(vblk->workpipe);
1737
1738 /* Create stack for thread and run it. Since stack grows upwards, we
1739 * point the stack pointer to the end of this region. */
1740 stack = malloc(32768);
1741 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
1742 * becoming a zombie. */
1743 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
1744 err(1, "Creating clone");
1745
1746 /* We don't need to keep the I/O thread's end of the pipes open. */
1747 close(vblk->done_fd);
1748 close(vblk->workpipe[0]);
1749
1750 verbose("device %u: virtblock %llu sectors\n", 1520 verbose("device %u: virtblock %llu sectors\n",
1751 devices.device_num, le64_to_cpu(conf.capacity)); 1521 ++devices.device_num, le64_to_cpu(conf.capacity));
1752} 1522}
1753 1523
1524struct rng_info {
1525 int rfd;
1526};
1527
1754/* Our random number generator device reads from /dev/random into the Guest's 1528/* Our random number generator device reads from /dev/random into the Guest's
1755 * input buffers. The usual case is that the Guest doesn't want random numbers 1529 * input buffers. The usual case is that the Guest doesn't want random numbers
1756 * and so has no buffers although /dev/random is still readable, whereas 1530 * and so has no buffers although /dev/random is still readable, whereas
1757 * console is the reverse. 1531 * console is the reverse.
1758 * 1532 *
1759 * The same logic applies, however. */ 1533 * The same logic applies, however. */
1760static bool handle_rng_input(int fd, struct device *dev) 1534static void rng_input(struct virtqueue *vq)
1761{ 1535{
1762 int len; 1536 int len;
1763 unsigned int head, in_num, out_num, totlen = 0; 1537 unsigned int head, in_num, out_num, totlen = 0;
1764 struct iovec iov[dev->vq->vring.num]; 1538 struct rng_info *rng_info = vq->dev->priv;
1539 struct iovec iov[vq->vring.num];
1765 1540
1766 /* First we need a buffer from the Guests's virtqueue. */ 1541 /* First we need a buffer from the Guests's virtqueue. */
1767 head = get_vq_desc(dev->vq, iov, &out_num, &in_num); 1542 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1768
1769 /* If they're not ready for input, stop listening to this file
1770 * descriptor. We'll start again once they add an input buffer. */
1771 if (head == dev->vq->vring.num)
1772 return false;
1773
1774 if (out_num) 1543 if (out_num)
1775 errx(1, "Output buffers in rng?"); 1544 errx(1, "Output buffers in rng?");
1776 1545
@@ -1778,7 +1547,7 @@ static bool handle_rng_input(int fd, struct device *dev)
1778 * it reads straight into the Guest's buffer. We loop to make sure we 1547 * it reads straight into the Guest's buffer. We loop to make sure we
1779 * fill it. */ 1548 * fill it. */
1780 while (!iov_empty(iov, in_num)) { 1549 while (!iov_empty(iov, in_num)) {
1781 len = readv(dev->fd, iov, in_num); 1550 len = readv(rng_info->rfd, iov, in_num);
1782 if (len <= 0) 1551 if (len <= 0)
1783 err(1, "Read from /dev/random gave %i", len); 1552 err(1, "Read from /dev/random gave %i", len);
1784 iov_consume(iov, in_num, len); 1553 iov_consume(iov, in_num, len);
@@ -1786,25 +1555,23 @@ static bool handle_rng_input(int fd, struct device *dev)
1786 } 1555 }
1787 1556
1788 /* Tell the Guest about the new input. */ 1557 /* Tell the Guest about the new input. */
1789 add_used_and_trigger(fd, dev->vq, head, totlen); 1558 add_used(vq, head, totlen);
1790
1791 /* Everything went OK! */
1792 return true;
1793} 1559}
1794 1560
1795/* And this creates a "hardware" random number device for the Guest. */ 1561/* And this creates a "hardware" random number device for the Guest. */
1796static void setup_rng(void) 1562static void setup_rng(void)
1797{ 1563{
1798 struct device *dev; 1564 struct device *dev;
1799 int fd; 1565 struct rng_info *rng_info = malloc(sizeof(*rng_info));
1800 1566
1801 fd = open_or_die("/dev/random", O_RDONLY); 1567 rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
1802 1568
1803 /* The device responds to return from I/O thread. */ 1569 /* The device responds to return from I/O thread. */
1804 dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input); 1570 dev = new_device("rng", VIRTIO_ID_RNG);
1571 dev->priv = rng_info;
1805 1572
1806 /* The device has one virtqueue, where the Guest places inbufs. */ 1573 /* The device has one virtqueue, where the Guest places inbufs. */
1807 add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); 1574 add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
1808 1575
1809 verbose("device %u: rng\n", devices.device_num++); 1576 verbose("device %u: rng\n", devices.device_num++);
1810} 1577}
@@ -1820,17 +1587,18 @@ static void __attribute__((noreturn)) restart_guest(void)
1820 for (i = 3; i < FD_SETSIZE; i++) 1587 for (i = 3; i < FD_SETSIZE; i++)
1821 close(i); 1588 close(i);
1822 1589
1823 /* The exec automatically gets rid of the I/O and Waker threads. */ 1590 /* Reset all the devices (kills all threads). */
1591 cleanup_devices();
1592
1824 execv(main_args[0], main_args); 1593 execv(main_args[0], main_args);
1825 err(1, "Could not exec %s", main_args[0]); 1594 err(1, "Could not exec %s", main_args[0]);
1826} 1595}
1827 1596
1828/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves 1597/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
1829 * its input and output, and finally, lays it to rest. */ 1598 * its input and output, and finally, lays it to rest. */
1830static void __attribute__((noreturn)) run_guest(int lguest_fd) 1599static void __attribute__((noreturn)) run_guest(void)
1831{ 1600{
1832 for (;;) { 1601 for (;;) {
1833 unsigned long args[] = { LHREQ_BREAK, 0 };
1834 unsigned long notify_addr; 1602 unsigned long notify_addr;
1835 int readval; 1603 int readval;
1836 1604
@@ -1841,8 +1609,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1841 /* One unsigned long means the Guest did HCALL_NOTIFY */ 1609 /* One unsigned long means the Guest did HCALL_NOTIFY */
1842 if (readval == sizeof(notify_addr)) { 1610 if (readval == sizeof(notify_addr)) {
1843 verbose("Notify on address %#lx\n", notify_addr); 1611 verbose("Notify on address %#lx\n", notify_addr);
1844 handle_output(lguest_fd, notify_addr); 1612 handle_output(notify_addr);
1845 continue;
1846 /* ENOENT means the Guest died. Reading tells us why. */ 1613 /* ENOENT means the Guest died. Reading tells us why. */
1847 } else if (errno == ENOENT) { 1614 } else if (errno == ENOENT) {
1848 char reason[1024] = { 0 }; 1615 char reason[1024] = { 0 };
@@ -1851,19 +1618,9 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1851 /* ERESTART means that we need to reboot the guest */ 1618 /* ERESTART means that we need to reboot the guest */
1852 } else if (errno == ERESTART) { 1619 } else if (errno == ERESTART) {
1853 restart_guest(); 1620 restart_guest();
1854 /* EAGAIN means a signal (timeout). 1621 /* Anything else means a bug or incompatible change. */
1855 * Anything else means a bug or incompatible change. */ 1622 } else
1856 } else if (errno != EAGAIN)
1857 err(1, "Running guest failed"); 1623 err(1, "Running guest failed");
1858
1859 /* Only service input on thread for CPU 0. */
1860 if (cpu_id != 0)
1861 continue;
1862
1863 /* Service input, then unset the BREAK to release the Waker. */
1864 handle_input(lguest_fd);
1865 if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
1866 err(1, "Resetting break");
1867 } 1624 }
1868} 1625}
1869/*L:240 1626/*L:240
@@ -1897,8 +1654,8 @@ int main(int argc, char *argv[])
1897 /* Memory, top-level pagetable, code startpoint and size of the 1654 /* Memory, top-level pagetable, code startpoint and size of the
1898 * (optional) initrd. */ 1655 * (optional) initrd. */
1899 unsigned long mem = 0, start, initrd_size = 0; 1656 unsigned long mem = 0, start, initrd_size = 0;
1900 /* Two temporaries and the /dev/lguest file descriptor. */ 1657 /* Two temporaries. */
1901 int i, c, lguest_fd; 1658 int i, c;
1902 /* The boot information for the Guest. */ 1659 /* The boot information for the Guest. */
1903 struct boot_params *boot; 1660 struct boot_params *boot;
1904 /* If they specify an initrd file to load. */ 1661 /* If they specify an initrd file to load. */
@@ -1906,18 +1663,10 @@ int main(int argc, char *argv[])
1906 1663
1907 /* Save the args: we "reboot" by execing ourselves again. */ 1664 /* Save the args: we "reboot" by execing ourselves again. */
1908 main_args = argv; 1665 main_args = argv;
1909 /* We don't "wait" for the children, so prevent them from becoming
1910 * zombies. */
1911 signal(SIGCHLD, SIG_IGN);
1912 1666
1913 /* First we initialize the device list. Since console and network 1667 /* First we initialize the device list. We keep a pointer to the last
1914 * device receive input from a file descriptor, we keep an fdset 1668 * device, and the next interrupt number to use for devices (1:
1915 * (infds) and the maximum fd number (max_infd) with the head of the 1669 * remember that 0 is used by the timer). */
1916 * list. We also keep a pointer to the last device. Finally, we keep
1917 * the next interrupt number to use for devices (1: remember that 0 is
1918 * used by the timer). */
1919 FD_ZERO(&devices.infds);
1920 devices.max_infd = -1;
1921 devices.lastdev = NULL; 1670 devices.lastdev = NULL;
1922 devices.next_irq = 1; 1671 devices.next_irq = 1;
1923 1672
@@ -1975,9 +1724,6 @@ int main(int argc, char *argv[])
1975 /* We always have a console device */ 1724 /* We always have a console device */
1976 setup_console(); 1725 setup_console();
1977 1726
1978 /* We can timeout waiting for Guest network transmit. */
1979 setup_timeout();
1980
1981 /* Now we load the kernel */ 1727 /* Now we load the kernel */
1982 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); 1728 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
1983 1729
@@ -2016,15 +1762,16 @@ int main(int argc, char *argv[])
2016 1762
2017 /* We tell the kernel to initialize the Guest: this returns the open 1763 /* We tell the kernel to initialize the Guest: this returns the open
2018 * /dev/lguest file descriptor. */ 1764 * /dev/lguest file descriptor. */
2019 lguest_fd = tell_kernel(start); 1765 tell_kernel(start);
1766
1767 /* Ensure that we terminate if a child dies. */
1768 signal(SIGCHLD, kill_launcher);
2020 1769
2021 /* We clone off a thread, which wakes the Launcher whenever one of the 1770 /* If we exit via err(), this kills all the threads, restores tty. */
2022 * input file descriptors needs attention. We call this the Waker, and 1771 atexit(cleanup_devices);
2023 * we'll cover it in a moment. */
2024 setup_waker(lguest_fd);
2025 1772
2026 /* Finally, run the Guest. This doesn't return. */ 1773 /* Finally, run the Guest. This doesn't return. */
2027 run_guest(lguest_fd); 1774 run_guest();
2028} 1775}
2029/*:*/ 1776/*:*/
2030 1777
diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt
index 29510dc51510..efb3a6a045a2 100644
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/lguest/lguest.txt
@@ -3,11 +3,11 @@
3 /, /` - or, A Young Coder's Illustrated Hypervisor 3 /, /` - or, A Young Coder's Illustrated Hypervisor
4 \\"--\\ http://lguest.ozlabs.org 4 \\"--\\ http://lguest.ozlabs.org
5 5
6Lguest is designed to be a minimal hypervisor for the Linux kernel, for 6Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel,
7Linux developers and users to experiment with virtualization with the 7for Linux developers and users to experiment with virtualization with the
8minimum of complexity. Nonetheless, it should have sufficient 8minimum of complexity. Nonetheless, it should have sufficient features to
9features to make it useful for specific tasks, and, of course, you are 9make it useful for specific tasks, and, of course, you are encouraged to fork
10encouraged to fork and enhance it (see drivers/lguest/README). 10and enhance it (see drivers/lguest/README).
11 11
12Features: 12Features:
13 13
diff --git a/Documentation/local_ops.txt b/Documentation/local_ops.txt
index 23045b8b50f0..300da4bdfdbd 100644
--- a/Documentation/local_ops.txt
+++ b/Documentation/local_ops.txt
@@ -34,7 +34,7 @@ out of order wrt other memory writes by the owner CPU.
34 34
35It can be done by slightly modifying the standard atomic operations : only 35It can be done by slightly modifying the standard atomic operations : only
36their UP variant must be kept. It typically means removing LOCK prefix (on 36their UP variant must be kept. It typically means removing LOCK prefix (on
37i386 and x86_64) and any SMP sychronization barrier. If the architecture does 37i386 and x86_64) and any SMP synchronization barrier. If the architecture does
38not have a different behavior between SMP and UP, including asm-generic/local.h 38not have a different behavior between SMP and UP, including asm-generic/local.h
39in your architecture's local.h is sufficient. 39in your architecture's local.h is sufficient.
40 40
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index 488773018152..e20d913d5914 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -27,33 +27,37 @@ lock-class.
27State 27State
28----- 28-----
29 29
30The validator tracks lock-class usage history into 5 separate state bits: 30The validator tracks lock-class usage history into 4n + 1 separate state bits:
31 31
32- 'ever held in hardirq context' [ == hardirq-safe ] 32- 'ever held in STATE context'
33- 'ever held in softirq context' [ == softirq-safe ] 33- 'ever head as readlock in STATE context'
34- 'ever held with hardirqs enabled' [ == hardirq-unsafe ] 34- 'ever head with STATE enabled'
35- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ] 35- 'ever head as readlock with STATE enabled'
36
37Where STATE can be either one of (kernel/lockdep_states.h)
38 - hardirq
39 - softirq
40 - reclaim_fs
36 41
37- 'ever used' [ == !unused ] 42- 'ever used' [ == !unused ]
38 43
39When locking rules are violated, these 4 state bits are presented in the 44When locking rules are violated, these state bits are presented in the
40locking error messages, inside curlies. A contrived example: 45locking error messages, inside curlies. A contrived example:
41 46
42 modprobe/2287 is trying to acquire lock: 47 modprobe/2287 is trying to acquire lock:
43 (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 48 (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
44 49
45 but task is already holding lock: 50 but task is already holding lock:
46 (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24 51 (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
47 52
48 53
49The bit position indicates hardirq, softirq, hardirq-read, 54The bit position indicates STATE, STATE-read, for each of the states listed
50softirq-read respectively, and the character displayed in each 55above, and the character displayed in each indicates:
51indicates:
52 56
53 '.' acquired while irqs disabled 57 '.' acquired while irqs disabled and not in irq context
54 '+' acquired in irq context 58 '-' acquired in irq context
55 '-' acquired with irqs enabled 59 '+' acquired with irqs enabled
56 '?' read acquired in irq context with irqs enabled. 60 '?' acquired in irq context with irqs enabled.
57 61
58Unused mutexes cannot be part of the cause of an error. 62Unused mutexes cannot be part of the cause of an error.
59 63
diff --git a/Documentation/logo.gif b/Documentation/logo.gif
new file mode 100644
index 000000000000..2eae75fecfb9
--- /dev/null
+++ b/Documentation/logo.gif
Binary files differ
diff --git a/Documentation/logo.svg b/Documentation/logo.svg
deleted file mode 100644
index cb9e4851d8c3..000000000000
--- a/Documentation/logo.svg
+++ /dev/null
@@ -1,2911 +0,0 @@
1<?xml version="1.0" encoding="UTF-8" standalone="no"?>
2<!-- Created with Inkscape (http://www.inkscape.org/) -->
3<svg
4 xmlns:dc="http://purl.org/dc/elements/1.1/"
5 xmlns:cc="http://creativecommons.org/ns#"
6 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
7 xmlns:svg="http://www.w3.org/2000/svg"
8 xmlns="http://www.w3.org/2000/svg"
9 xmlns:xlink="http://www.w3.org/1999/xlink"
10 xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
11 xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
12 width="1771.6534"
13 height="1417.3228"
14 id="svg2"
15 sodipodi:version="0.32"
16 inkscape:version="0.46"
17 sodipodi:docname="tuz.svg"
18 inkscape:output_extension="org.inkscape.output.svg.inkscape"
19 version="1.0"
20 style="display:inline;enable-background:new"
21 inkscape:export-filename="/home/cheeseness/Documents/LCA09/mascot/tuz_final.png"
22 inkscape:export-xdpi="100.03588"
23 inkscape:export-ydpi="100.03588">
24 <sodipodi:namedview
25 id="base"
26 pagecolor="#ffffff"
27 bordercolor="#666666"
28 borderopacity="1.0"
29 gridtolerance="10000"
30 guidetolerance="10"
31 objecttolerance="10"
32 inkscape:pageopacity="0.0"
33 inkscape:pageshadow="2"
34 inkscape:zoom="0.25"
35 inkscape:cx="-174.7931"
36 inkscape:cy="784.26325"
37 inkscape:document-units="px"
38 inkscape:current-layer="svg2"
39 showgrid="false"
40 inkscape:window-width="1280"
41 inkscape:window-height="823"
42 inkscape:window-x="-4"
43 inkscape:window-y="25"
44 showguides="true"
45 inkscape:guide-bbox="true"
46 units="mm" />
47 <defs
48 id="defs4">
49 <filter
50 inkscape:collect="always"
51 x="-0.084654994"
52 width="1.16931"
53 y="-0.36592469"
54 height="1.7318494"
55 id="filter11361">
56 <feGaussianBlur
57 inkscape:collect="always"
58 stdDeviation="4.5740586"
59 id="feGaussianBlur11363" />
60 </filter>
61 <inkscape:perspective
62 sodipodi:type="inkscape:persp3d"
63 inkscape:vp_x="0 : 564.0976 : 1"
64 inkscape:vp_y="0 : 1000 : 0"
65 inkscape:vp_z="1445.8591 : 564.0976 : 1"
66 inkscape:persp3d-origin="722.92957 : 376.06506 : 1"
67 id="perspective8145" />
68 <linearGradient
69 id="linearGradient7622">
70 <stop
71 style="stop-color:#ffffff;stop-opacity:1;"
72 offset="0"
73 id="stop7624" />
74 <stop
75 style="stop-color:#ffffff;stop-opacity:0;"
76 offset="1"
77 id="stop7626" />
78 </linearGradient>
79 <linearGradient
80 id="linearGradient4113">
81 <stop
82 style="stop-color:#000000;stop-opacity:0;"
83 offset="0"
84 id="stop4115" />
85 <stop
86 style="stop-color:#000000;stop-opacity:1;"
87 offset="1"
88 id="stop4117" />
89 </linearGradient>
90 <linearGradient
91 inkscape:collect="always"
92 id="linearGradient3660">
93 <stop
94 style="stop-color:#ffffff;stop-opacity:1;"
95 offset="0"
96 id="stop3662" />
97 <stop
98 style="stop-color:#ffffff;stop-opacity:0;"
99 offset="1"
100 id="stop3664" />
101 </linearGradient>
102 <linearGradient
103 id="linearGradient3627">
104 <stop
105 style="stop-color:#ffffff;stop-opacity:1;"
106 offset="0"
107 id="stop3629" />
108 <stop
109 style="stop-color:#000000;stop-opacity:1;"
110 offset="1"
111 id="stop3631" />
112 </linearGradient>
113 <linearGradient
114 id="linearGradient2843">
115 <stop
116 id="stop2845"
117 offset="0"
118 style="stop-color:#000000;stop-opacity:1;" />
119 <stop
120 style="stop-color:#000000;stop-opacity:1;"
121 offset="0.02188784"
122 id="stop2847" />
123 <stop
124 style="stop-color:#000000;stop-opacity:1;"
125 offset="0.75866222"
126 id="stop2849" />
127 <stop
128 id="stop2851"
129 offset="0.88508981"
130 style="stop-color:#232323;stop-opacity:1;" />
131 <stop
132 id="stop2853"
133 offset="1"
134 style="stop-color:#595959;stop-opacity:1;" />
135 </linearGradient>
136 <linearGradient
137 inkscape:collect="always"
138 id="linearGradient8964">
139 <stop
140 style="stop-color:#1a1a1a;stop-opacity:1;"
141 offset="0"
142 id="stop8966" />
143 <stop
144 style="stop-color:#1a1a1a;stop-opacity:0;"
145 offset="1"
146 id="stop8968" />
147 </linearGradient>
148 <linearGradient
149 id="linearGradient8952">
150 <stop
151 style="stop-color:#0a0c0c;stop-opacity:1;"
152 offset="0"
153 id="stop8954" />
154 <stop
155 style="stop-color:#1f2727;stop-opacity:0;"
156 offset="1"
157 id="stop8956" />
158 </linearGradient>
159 <linearGradient
160 id="linearGradient8430">
161 <stop
162 style="stop-color:#1e2323;stop-opacity:1;"
163 offset="0"
164 id="stop8432" />
165 <stop
166 id="stop8438"
167 offset="0.55992389"
168 style="stop-color:#181d1d;stop-opacity:1;" />
169 <stop
170 style="stop-color:#000000;stop-opacity:1;"
171 offset="1"
172 id="stop8434" />
173 </linearGradient>
174 <linearGradient
175 id="linearGradient8398">
176 <stop
177 style="stop-color:#283131;stop-opacity:0;"
178 offset="0"
179 id="stop8400" />
180 <stop
181 id="stop8402"
182 offset="0.5125587"
183 style="stop-color:#1e2424;stop-opacity:0;" />
184 <stop
185 style="stop-color:#000000;stop-opacity:1;"
186 offset="1"
187 id="stop8404" />
188 </linearGradient>
189 <linearGradient
190 inkscape:collect="always"
191 id="linearGradient4870">
192 <stop
193 style="stop-color:#c7bd80;stop-opacity:1;"
194 offset="0"
195 id="stop4872" />
196 <stop
197 style="stop-color:#c7bd80;stop-opacity:0;"
198 offset="1"
199 id="stop4874" />
200 </linearGradient>
201 <linearGradient
202 inkscape:collect="always"
203 id="linearGradient4862">
204 <stop
205 style="stop-color:#e2e2e2;stop-opacity:1;"
206 offset="0"
207 id="stop4864" />
208 <stop
209 style="stop-color:#e2e2e2;stop-opacity:0;"
210 offset="1"
211 id="stop4866" />
212 </linearGradient>
213 <linearGradient
214 id="linearGradient4478">
215 <stop
216 style="stop-color:#f9eed3;stop-opacity:1;"
217 offset="0"
218 id="stop4480" />
219 <stop
220 style="stop-color:#000000;stop-opacity:0;"
221 offset="1"
222 id="stop4482" />
223 </linearGradient>
224 <linearGradient
225 id="linearGradient4106">
226 <stop
227 style="stop-color:#d9e002;stop-opacity:1;"
228 offset="0"
229 id="stop4108" />
230 <stop
231 id="stop4114"
232 offset="0.5"
233 style="stop-color:#a9ae01;stop-opacity:1;" />
234 <stop
235 style="stop-color:#717501;stop-opacity:1;"
236 offset="1"
237 id="stop4110" />
238 </linearGradient>
239 <linearGradient
240 id="linearGradient4084">
241 <stop
242 style="stop-color:#7d7d00;stop-opacity:1;"
243 offset="0"
244 id="stop4086" />
245 <stop
246 id="stop4088"
247 offset="0.3636601"
248 style="stop-color:#c6c700;stop-opacity:1;" />
249 <stop
250 style="stop-color:#f6f800;stop-opacity:1;"
251 offset="1"
252 id="stop4090" />
253 </linearGradient>
254 <linearGradient
255 id="linearGradient4041">
256 <stop
257 id="stop4043"
258 offset="0"
259 style="stop-color:#ffff00;stop-opacity:1;" />
260 <stop
261 id="stop4045"
262 offset="1"
263 style="stop-color:#ffff00;stop-opacity:0;" />
264 </linearGradient>
265 <linearGradient
266 id="linearGradient4025">
267 <stop
268 style="stop-color:#ffffff;stop-opacity:1;"
269 offset="0"
270 id="stop4027" />
271 <stop
272 style="stop-color:#ffffff;stop-opacity:0;"
273 offset="1"
274 id="stop4031" />
275 </linearGradient>
276 <linearGradient
277 id="linearGradient4013">
278 <stop
279 style="stop-color:#ffff00;stop-opacity:1;"
280 offset="0"
281 id="stop4015" />
282 <stop
283 style="stop-color:#b2b200;stop-opacity:1;"
284 offset="1"
285 id="stop4017" />
286 </linearGradient>
287 <linearGradient
288 id="linearGradient3985">
289 <stop
290 style="stop-color:#000000;stop-opacity:1;"
291 offset="0"
292 id="stop3987" />
293 <stop
294 style="stop-color:#1d1d1d;stop-opacity:1;"
295 offset="1"
296 id="stop3989" />
297 </linearGradient>
298 <linearGradient
299 id="linearGradient3961">
300 <stop
301 style="stop-color:#283131;stop-opacity:0;"
302 offset="0"
303 id="stop3963" />
304 <stop
305 id="stop3965"
306 offset="0.5"
307 style="stop-color:#1e2424;stop-opacity:1;" />
308 <stop
309 style="stop-color:#000000;stop-opacity:1;"
310 offset="1"
311 id="stop3967" />
312 </linearGradient>
313 <linearGradient
314 id="linearGradient3951">
315 <stop
316 id="stop3953"
317 offset="0"
318 style="stop-color:#344040;stop-opacity:1;" />
319 <stop
320 style="stop-color:#222929;stop-opacity:1;"
321 offset="0.5"
322 id="stop3955" />
323 <stop
324 id="stop3957"
325 offset="1"
326 style="stop-color:#000000;stop-opacity:1;" />
327 </linearGradient>
328 <linearGradient
329 id="linearGradient3909">
330 <stop
331 style="stop-color:#283131;stop-opacity:1;"
332 offset="0"
333 id="stop3911" />
334 <stop
335 id="stop3917"
336 offset="0.5"
337 style="stop-color:#1e2424;stop-opacity:1;" />
338 <stop
339 style="stop-color:#000000;stop-opacity:1;"
340 offset="1"
341 id="stop3913" />
342 </linearGradient>
343 <linearGradient
344 id="linearGradient3537">
345 <stop
346 style="stop-color:#ada469;stop-opacity:1;"
347 offset="0"
348 id="stop3539" />
349 <stop
350 id="stop3545"
351 offset="0.81132078"
352 style="stop-color:#ada469;stop-opacity:1;" />
353 <stop
354 style="stop-color:#ffffff;stop-opacity:1;"
355 offset="1"
356 id="stop3541" />
357 </linearGradient>
358 <linearGradient
359 id="linearGradient3317">
360 <stop
361 style="stop-color:#cfc690;stop-opacity:1"
362 offset="0"
363 id="stop3319" />
364 <stop
365 id="stop3321"
366 offset="0.21161865"
367 style="stop-color:#afa775;stop-opacity:1;" />
368 <stop
369 id="stop3323"
370 offset="0.53408515"
371 style="stop-color:#615c3a;stop-opacity:1;" />
372 <stop
373 style="stop-color:#000000;stop-opacity:1;"
374 offset="0.76504093"
375 id="stop3325" />
376 <stop
377 id="stop3327"
378 offset="1"
379 style="stop-color:#403518;stop-opacity:1;" />
380 </linearGradient>
381 <linearGradient
382 id="linearGradient3239">
383 <stop
384 id="stop3251"
385 offset="0"
386 style="stop-color:#cfc690;stop-opacity:1;" />
387 <stop
388 style="stop-color:#afa775;stop-opacity:1;"
389 offset="0.21161865"
390 id="stop3267" />
391 <stop
392 style="stop-color:#615c3a;stop-opacity:1;"
393 offset="0.53408515"
394 id="stop3261" />
395 <stop
396 id="stop3265"
397 offset="0.76504093"
398 style="stop-color:#000000;stop-opacity:1;" />
399 <stop
400 style="stop-color:#403518;stop-opacity:1;"
401 offset="1"
402 id="stop3243" />
403 </linearGradient>
404 <radialGradient
405 inkscape:collect="always"
406 xlink:href="#linearGradient3239"
407 id="radialGradient3281"
408 gradientUnits="userSpaceOnUse"
409 gradientTransform="matrix(1.5480423,1.7414304,-1.9683515,1.7497638,-1130.5586,-1872.5121)"
410 spreadMethod="pad"
411 cx="806.52582"
412 cy="212.68117"
413 fx="806.52582"
414 fy="212.68117"
415 r="48.363216" />
416 <radialGradient
417 inkscape:collect="always"
418 xlink:href="#linearGradient3317"
419 id="radialGradient3315"
420 cx="543.6698"
421 cy="147.3131"
422 fx="543.6698"
423 fy="147.3131"
424 r="47.863216"
425 gradientTransform="matrix(2.1382256,0,0,2.3382884,-77.03847,-101.68704)"
426 gradientUnits="userSpaceOnUse" />
427 <radialGradient
428 inkscape:collect="always"
429 xlink:href="#linearGradient3537"
430 id="radialGradient3543"
431 cx="385"
432 cy="237.00504"
433 fx="385"
434 fy="237.00504"
435 r="86.928574"
436 gradientTransform="matrix(1,0,0,0.8562038,0,34.080427)"
437 gradientUnits="userSpaceOnUse" />
438 <radialGradient
439 inkscape:collect="always"
440 xlink:href="#linearGradient3909"
441 id="radialGradient3915"
442 cx="418.30365"
443 cy="342.47794"
444 fx="418.30365"
445 fy="342.47794"
446 r="131.4509"
447 gradientTransform="matrix(1.3957347,0.6211056,-0.4244067,0.9537174,-15.061913,-227.96711)"
448 gradientUnits="userSpaceOnUse" />
449 <radialGradient
450 inkscape:collect="always"
451 xlink:href="#linearGradient3951"
452 id="radialGradient3933"
453 cx="397.16388"
454 cy="336.95245"
455 fx="397.16388"
456 fy="336.95245"
457 r="36.75"
458 gradientUnits="userSpaceOnUse"
459 gradientTransform="matrix(1.9449972,2.4894837e-7,-2.4894833e-7,1.9449969,-375.31868,-318.41912)" />
460 <linearGradient
461 inkscape:collect="always"
462 xlink:href="#linearGradient3961"
463 id="linearGradient3959"
464 x1="398.21429"
465 y1="343.52289"
466 x2="379.28571"
467 y2="265.30862"
468 gradientUnits="userSpaceOnUse"
469 gradientTransform="translate(450.03125,73.843964)" />
470 <filter
471 inkscape:collect="always"
472 id="filter3981"
473 x="-0.30000001"
474 width="1.6"
475 y="-0.30000001"
476 height="1.6">
477 <feGaussianBlur
478 inkscape:collect="always"
479 stdDeviation="2"
480 id="feGaussianBlur3983" />
481 </filter>
482 <radialGradient
483 inkscape:collect="always"
484 xlink:href="#linearGradient3985"
485 id="radialGradient3991"
486 cx="402.48898"
487 cy="317.23578"
488 fx="402.48898"
489 fy="317.23578"
490 r="23.714285"
491 gradientUnits="userSpaceOnUse"
492 gradientTransform="matrix(4.3776616,0,0,4.3776616,-1358.3025,-1070.7357)" />
493 <clipPath
494 clipPathUnits="userSpaceOnUse"
495 id="clipPath3999">
496 <path
497 style="opacity:1;fill:#f5ff04;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline"
498 d="M 179.64286,267.36218 C 157.23242,307.0651 119.02676,383.14247 110.35715,417.00504 C 101.70994,450.78014 101.58516,483.42158 110,503.43362 C 118.3602,523.31575 136.16398,539.06642 150.71428,544.86218 C 150.1179,530.48631 165.08723,501.57635 223.57143,472.36218 C 282.1977,443.07704 301.95306,445.23132 327.14285,425.21932 C 352.77291,404.85756 339.75316,358.17469 330.35714,331.29075 C 320.9229,304.29747 295.38973,272.16627 263.92857,261.6479 C 232.8953,251.27258 198.91081,256.79953 179.64286,267.36218 z"
499 id="path4001"
500 sodipodi:nodetypes="czzczzzzc" />
501 </clipPath>
502 <radialGradient
503 inkscape:collect="always"
504 xlink:href="#linearGradient4013"
505 id="radialGradient4056"
506 gradientUnits="userSpaceOnUse"
507 gradientTransform="matrix(1.1323239,0.7659488,-1.4550286,2.1510098,588.75376,-711.79716)"
508 cx="228.81355"
509 cy="440.26971"
510 fx="228.81355"
511 fy="440.26971"
512 r="119.17509" />
513 <radialGradient
514 inkscape:collect="always"
515 xlink:href="#linearGradient4041"
516 id="radialGradient4060"
517 gradientUnits="userSpaceOnUse"
518 gradientTransform="matrix(5.911206e-2,2.6869855,-0.7234268,1.5914947e-2,408.72779,-424.56452)"
519 cx="275.4422"
520 cy="335.34866"
521 fx="275.4422"
522 fy="335.34866"
523 r="36.75" />
524 <radialGradient
525 inkscape:collect="always"
526 xlink:href="#linearGradient4025"
527 id="radialGradient4062"
528 gradientUnits="userSpaceOnUse"
529 gradientTransform="matrix(5.911206e-2,2.6869855,-0.7234268,1.5914947e-2,408.72779,-424.56452)"
530 cx="275.4422"
531 cy="335.34866"
532 fx="275.4422"
533 fy="335.34866"
534 r="36.75" />
535 <linearGradient
536 inkscape:collect="always"
537 xlink:href="#linearGradient4084"
538 id="linearGradient4082"
539 gradientUnits="userSpaceOnUse"
540 x1="182.35046"
541 y1="256.11136"
542 x2="145.53348"
543 y2="542.20502" />
544 <clipPath
545 clipPathUnits="userSpaceOnUse"
546 id="clipPath4100">
547 <path
548 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.9000755px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
549 d="M 265.93541,126.68393 L 247.1682,295.54701 L 421.27363,222.42633 L 483.22803,311.08516 L 541.11243,279.09486 L 503.57801,99.035183 L 265.93541,126.68393 z"
550 id="path4102"
551 sodipodi:nodetypes="ccccccc" />
552 </clipPath>
553 <radialGradient
554 inkscape:collect="always"
555 xlink:href="#linearGradient4106"
556 id="radialGradient4112"
557 cx="250.22678"
558 cy="475.09763"
559 fx="250.22678"
560 fy="475.09763"
561 r="95.98877"
562 gradientTransform="matrix(1.2259004,-0.7077739,0.1413989,0.2449102,322.22326,608.91815)"
563 gradientUnits="userSpaceOnUse" />
564 <linearGradient
565 inkscape:collect="always"
566 xlink:href="#linearGradient4478"
567 id="linearGradient4484"
568 x1="412.08926"
569 y1="404.91574"
570 x2="417.375"
571 y2="401.82648"
572 gradientUnits="userSpaceOnUse" />
573 <linearGradient
574 inkscape:collect="always"
575 xlink:href="#linearGradient4478"
576 id="linearGradient4486"
577 x1="411.91071"
578 y1="404.91577"
579 x2="417.375"
580 y2="401.82648"
581 gradientUnits="userSpaceOnUse" />
582 <linearGradient
583 inkscape:collect="always"
584 xlink:href="#linearGradient4478"
585 id="linearGradient4488"
586 x1="411.91071"
587 y1="405.54077"
588 x2="417.375"
589 y2="401.82648"
590 gradientUnits="userSpaceOnUse" />
591 <linearGradient
592 inkscape:collect="always"
593 xlink:href="#linearGradient4478"
594 id="linearGradient4490"
595 x1="412.08926"
596 y1="405.54077"
597 x2="417.375"
598 y2="401.82648"
599 gradientUnits="userSpaceOnUse" />
600 <linearGradient
601 inkscape:collect="always"
602 xlink:href="#linearGradient4478"
603 id="linearGradient4492"
604 x1="411.73212"
605 y1="405.54077"
606 x2="417.375"
607 y2="401.82648"
608 gradientUnits="userSpaceOnUse" />
609 <radialGradient
610 inkscape:collect="always"
611 xlink:href="#linearGradient4862"
612 id="radialGradient4868"
613 cx="429.56738"
614 cy="377.42877"
615 fx="429.56738"
616 fy="377.42877"
617 r="72.079735"
618 gradientTransform="matrix(1,0,0,0.618034,0,144.16496)"
619 gradientUnits="userSpaceOnUse" />
620 <radialGradient
621 inkscape:collect="always"
622 xlink:href="#linearGradient4870"
623 id="radialGradient4876"
624 cx="437.6991"
625 cy="391.21735"
626 fx="437.6991"
627 fy="391.21735"
628 r="36.611931"
629 gradientTransform="matrix(1,0,0,0.618034,0,149.43174)"
630 gradientUnits="userSpaceOnUse" />
631 <radialGradient
632 inkscape:collect="always"
633 xlink:href="#linearGradient4013"
634 id="radialGradient3585"
635 gradientUnits="userSpaceOnUse"
636 gradientTransform="matrix(1.1323239,0.7659488,-1.4550286,2.1510098,588.75376,-711.79716)"
637 cx="228.81355"
638 cy="440.26971"
639 fx="228.81355"
640 fy="440.26971"
641 r="119.17509" />
642 <linearGradient
643 inkscape:collect="always"
644 xlink:href="#linearGradient4084"
645 id="linearGradient3587"
646 gradientUnits="userSpaceOnUse"
647 x1="182.35046"
648 y1="256.11136"
649 x2="145.53348"
650 y2="542.20502" />
651 <radialGradient
652 inkscape:collect="always"
653 xlink:href="#linearGradient3317"
654 id="radialGradient8410"
655 gradientUnits="userSpaceOnUse"
656 gradientTransform="matrix(1.0036478,-1.0345492e-7,1.7124628e-7,1.6613125,-753.99632,-302.76972)"
657 cx="317.78754"
658 cy="129.65378"
659 fx="317.78754"
660 fy="129.65378"
661 r="47.863216" />
662 <radialGradient
663 inkscape:collect="always"
664 xlink:href="#linearGradient8398"
665 id="radialGradient8412"
666 gradientUnits="userSpaceOnUse"
667 gradientTransform="matrix(2.0747661,-0.1577957,0.2382425,3.1325183,-1144.2358,-272.29325)"
668 cx="325.30847"
669 cy="80.909554"
670 fx="325.30847"
671 fy="80.909554"
672 r="26.937988" />
673 <clipPath
674 clipPathUnits="userSpaceOnUse"
675 id="clipPath8514">
676 <path
677 style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
678 d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z"
679 id="path8516"
680 sodipodi:nodetypes="cscccccccccccc" />
681 </clipPath>
682 <clipPath
683 clipPathUnits="userSpaceOnUse"
684 id="clipPath8604">
685 <path
686 style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
687 d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z"
688 id="path8606"
689 sodipodi:nodetypes="cscccccccccccc" />
690 </clipPath>
691 <clipPath
692 clipPathUnits="userSpaceOnUse"
693 id="clipPath8610">
694 <path
695 style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
696 d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z"
697 id="path8612"
698 sodipodi:nodetypes="cscccccccccccc" />
699 </clipPath>
700 <clipPath
701 clipPathUnits="userSpaceOnUse"
702 id="clipPath8616">
703 <path
704 style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
705 d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z"
706 id="path8618"
707 sodipodi:nodetypes="cscccccccccccc" />
708 </clipPath>
709 <clipPath
710 clipPathUnits="userSpaceOnUse"
711 id="clipPath8622">
712 <path
713 style="opacity:1;fill:#202020;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
714 d="M 821.64329,477.88997 C 821.64329,477.88997 844.26276,471.38316 857.38604,472.01724 C 870.50932,472.65133 888.02762,473.95586 901.09489,484.20343 C 914.16216,494.45099 926.16263,511.3435 935.20728,542.57308 C 944.25193,573.80266 936.9056,641.82509 929.03125,685.92043 C 921.1569,730.01577 900.76615,792.03341 884.03125,825.92043 C 867.29635,859.80745 834.23354,903.41563 823.46182,915.79659 C 812.0976,928.85856 767.25593,952.22276 744.03125,958.06326 C 749.33455,947.45666 792.93101,907.47442 779.03125,897.349 C 765.01228,887.13674 733.27116,943.33136 694.7381,926.38217 C 716.12041,913.25005 736.5175,875.19611 728.77871,859.78772 C 720.93846,844.17733 698.07378,908.54529 635.24317,896.8006 C 665.29521,869.27394 690.65023,825.89659 676.50587,813.8209 C 662.09071,801.51403 616.04412,868.11405 616.04412,868.11405 C 616.04412,868.11405 613.22222,826.41287 629.81732,799.50673 C 646.45667,772.52886 709.47029,717.89146 729.37045,687.80331 C 749.2706,657.71517 762.98301,621.79429 771.50587,595.28537 C 780.02873,568.77645 787.30681,518.18583 787.30681,518.18583"
715 id="path8624"
716 sodipodi:nodetypes="czzzzzzczczczczzzc" />
717 </clipPath>
718 <clipPath
719 clipPathUnits="userSpaceOnUse"
720 id="clipPath8642">
721 <path
722 style="opacity:1;fill:#0f0f0f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
723 d="M 366.88839,504.13471 C 366.88839,504.13471 337.33433,544.70776 319.03125,578.42042 C 300.72816,612.13309 260.41016,704.77736 248.67411,749.49185 C 236.91471,794.29529 186.17411,873.06329 186.17411,873.06329 L 262.24554,891.27757 C 262.24554,891.27757 274.05266,878.45422 293.31696,845.20614 C 312.58126,811.95806 353.67411,706.63471 353.67411,706.63471 L 366.88839,504.13471 z"
724 id="path8644"
725 sodipodi:nodetypes="czzcczcc" />
726 </clipPath>
727 <clipPath
728 clipPathUnits="userSpaceOnUse"
729 id="clipPath8658">
730 <path
731 style="opacity:1;fill:#0b0b0b;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
732 d="M 569.03125,1018.7776 C 564.74554,1019.4919 541.4031,1022.3957 511.17411,1028.7776 C 480.94512,1035.1595 411.39918,1054.7395 368.31696,1064.4919 C 325.23474,1074.2443 251.05253,1099.3079 211.40434,1091.7573 C 171.75616,1084.2067 121.88839,1027.349 121.88839,1027.349 L 126.17411,933.06329 C 126.17411,933.06329 212.05962,916.86235 238.31696,899.49186 C 264.57431,882.12137 283.89934,849.82588 297.60268,828.06329 C 311.30602,806.3007 330.45982,756.63471 330.45982,756.63471 L 569.03125,1018.7776 z"
733 id="path8660"
734 sodipodi:nodetypes="czzzcczzcc" />
735 </clipPath>
736 <filter
737 inkscape:collect="always"
738 id="filter8802"
739 x="-0.35311759"
740 width="1.7062352"
741 y="-0.1817714"
742 height="1.3635428">
743 <feGaussianBlur
744 inkscape:collect="always"
745 stdDeviation="48.038491"
746 id="feGaussianBlur8804" />
747 </filter>
748 <filter
749 inkscape:collect="always"
750 id="filter8806"
751 x="-0.61142862"
752 width="2.2228572"
753 y="-0.14930232"
754 height="1.2986046">
755 <feGaussianBlur
756 inkscape:collect="always"
757 stdDeviation="37.830213"
758 id="feGaussianBlur8808" />
759 </filter>
760 <filter
761 inkscape:collect="always"
762 id="filter8810"
763 x="-0.23519406"
764 width="1.4703881"
765 y="-0.24500646"
766 height="1.4900129">
767 <feGaussianBlur
768 inkscape:collect="always"
769 stdDeviation="58.328041"
770 id="feGaussianBlur8812" />
771 </filter>
772 <filter
773 inkscape:collect="always"
774 id="filter8814"
775 x="-0.20466694"
776 width="1.4093339"
777 y="-0.29007819"
778 height="1.5801564">
779 <feGaussianBlur
780 inkscape:collect="always"
781 stdDeviation="22.300169"
782 id="feGaussianBlur8816" />
783 </filter>
784 <filter
785 inkscape:collect="always"
786 id="filter8818"
787 x="-0.34381232"
788 width="1.6876246"
789 y="-0.18433961"
790 height="1.3686792">
791 <feGaussianBlur
792 inkscape:collect="always"
793 stdDeviation="34.542167"
794 id="feGaussianBlur8820" />
795 </filter>
796 <filter
797 inkscape:collect="always"
798 id="filter8822"
799 x="-0.2742857"
800 width="1.5485713"
801 y="-0.21333334"
802 height="1.4266667">
803 <feGaussianBlur
804 inkscape:collect="always"
805 stdDeviation="11.313708"
806 id="feGaussianBlur8824" />
807 </filter>
808 <filter
809 inkscape:collect="always"
810 id="filter8826"
811 x="-0.25894088"
812 width="1.5178818"
813 y="-0.2236412"
814 height="1.4472824">
815 <feGaussianBlur
816 inkscape:collect="always"
817 stdDeviation="19.631544"
818 id="feGaussianBlur8828" />
819 </filter>
820 <filter
821 inkscape:collect="always"
822 id="filter8856"
823 x="-0.3253231"
824 width="1.6506462"
825 y="-0.19013336"
826 height="1.3802667">
827 <feGaussianBlur
828 inkscape:collect="always"
829 stdDeviation="28.712591"
830 id="feGaussianBlur8858" />
831 </filter>
832 <filter
833 inkscape:collect="always"
834 id="filter8860"
835 x="-0.38093024"
836 width="1.7618605"
837 y="-0.17518716"
838 height="1.3503743">
839 <feGaussianBlur
840 inkscape:collect="always"
841 stdDeviation="19.304015"
842 id="feGaussianBlur8862" />
843 </filter>
844 <filter
845 inkscape:collect="always"
846 id="filter8888"
847 x="-0.2112188"
848 width="1.4224375"
849 y="-0.16808605"
850 height="1.3361721">
851 <feGaussianBlur
852 inkscape:collect="always"
853 stdDeviation="8.3693583"
854 id="feGaussianBlur8890" />
855 </filter>
856 <filter
857 inkscape:collect="always"
858 id="filter8892"
859 x="-0.18692794"
860 width="1.3738559"
861 y="-0.23646873"
862 height="1.4729375">
863 <feGaussianBlur
864 inkscape:collect="always"
865 stdDeviation="31.21228"
866 id="feGaussianBlur8894" />
867 </filter>
868 <clipPath
869 clipPathUnits="userSpaceOnUse"
870 id="clipPath8906">
871 <path
872 style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
873 d="M 352.24553,211.99185 C 348.4411,186.72762 335.43581,161.35383 335.08873,136.46662 C 334.90247,123.1111 338.36158,109.89571 348.84426,96.912574 C 385.19128,31.616739 465.78517,12.217889 534.77892,5.447147 C 621.70131,-5.569654 719.69159,23.387219 768.15026,100.84843 C 822.27428,176.58173 824.82502,273.38755 848.7623,360.37638 C 878.20009,487.50398 903.54144,616.59052 909.15454,747.22673 C 906.09106,825.40858 900.7282,912.41088 848.65133,975.36086 C 800.62479,1025.7183 725.86486,1025.4139 661.58145,1034.3632 C 571.02606,1039.0182 477.22992,1018.2174 399.79755,970.16496 C 335.02191,932.22495 304.06736,856.68633 302.51815,784.14538 C 294.12898,704.27022 328.90967,630.33687 354.13855,556.98577 C 361.60916,474.2247 363.55141,390.73802 363.79189,307.60093 C 362.95507,275.40549 356.70236,243.7836 352.24553,211.99185 z"
874 id="path8908"
875 sodipodi:nodetypes="cscccccccccccc" />
876 </clipPath>
877 <filter
878 inkscape:collect="always"
879 id="filter8940"
880 x="-0.25152978"
881 width="1.5030596"
882 y="-0.053035267"
883 height="1.1060705">
884 <feGaussianBlur
885 inkscape:collect="always"
886 stdDeviation="13.024603"
887 id="feGaussianBlur8942" />
888 </filter>
889 <linearGradient
890 inkscape:collect="always"
891 xlink:href="#linearGradient8952"
892 id="linearGradient8958"
893 x1="609.31244"
894 y1="239.46866"
895 x2="560.83142"
896 y2="262.86206"
897 gradientUnits="userSpaceOnUse"
898 gradientTransform="translate(450.03125,73.843964)" />
899 <linearGradient
900 inkscape:collect="always"
901 xlink:href="#linearGradient8964"
902 id="linearGradient8970"
903 x1="603.84064"
904 y1="627.85303"
905 x2="616.24396"
906 y2="585.42664"
907 gradientUnits="userSpaceOnUse"
908 gradientTransform="translate(450.03125,73.843964)" />
909 <filter
910 inkscape:collect="always"
911 id="filter9020"
912 x="-0.32861114"
913 width="1.6572223"
914 y="-0.182"
915 height="1.364">
916 <feGaussianBlur
917 inkscape:collect="always"
918 stdDeviation="20.912684"
919 id="feGaussianBlur9022" />
920 </filter>
921 <filter
922 inkscape:collect="always"
923 id="filter9024"
924 x="-0.55453134"
925 width="2.1090627"
926 y="-0.51434779"
927 height="2.0286956">
928 <feGaussianBlur
929 inkscape:collect="always"
930 stdDeviation="20.912684"
931 id="feGaussianBlur9026" />
932 </filter>
933 <filter
934 inkscape:collect="always"
935 id="filter9044"
936 x="-0.32631579"
937 width="1.6526316"
938 y="-0.84545463"
939 height="2.6909094">
940 <feGaussianBlur
941 inkscape:collect="always"
942 stdDeviation="21.92031"
943 id="feGaussianBlur9046" />
944 </filter>
945 <filter
946 inkscape:collect="always"
947 id="filter9048"
948 x="-0.40879121"
949 width="1.8175824"
950 y="-0.71538466"
951 height="2.4307692">
952 <feGaussianBlur
953 inkscape:collect="always"
954 stdDeviation="21.92031"
955 id="feGaussianBlur9050" />
956 </filter>
957 <filter
958 inkscape:collect="always"
959 id="filter3587"
960 x="-0.1">
961 <feGaussianBlur
962 inkscape:collect="always"
963 stdDeviation="8.881432"
964 id="feGaussianBlur3589" />
965 </filter>
966 <clipPath
967 clipPathUnits="userSpaceOnUse"
968 id="clipPath3602">
969 <path
970 sodipodi:nodetypes="czzzzzzczczczczzzc"
971 id="path3604"
972 d="M 647.61204,540.04601 C 647.61204,540.04601 670.23151,533.5392 683.35479,534.17328 C 696.47807,534.80737 713.99637,536.1119 727.06364,546.35947 C 740.13091,556.60703 752.13138,573.49954 761.17603,604.72912 C 770.22068,635.9587 762.87435,703.98113 755,748.07647 C 747.12565,792.17181 726.7349,854.18945 710,888.07647 C 693.2651,921.96349 660.20229,965.57167 649.43057,977.95263 C 638.06635,991.0146 593.22468,1014.3788 570,1020.2193 C 575.3033,1009.6127 618.89976,969.63046 605,959.50504 C 590.98103,949.29278 559.23991,1005.4874 520.70685,988.53821 C 542.08916,975.40609 562.48625,937.35215 554.74746,921.94376 C 546.90721,906.33337 524.04253,970.70133 461.21192,958.95664 C 491.26396,931.42998 516.61898,888.05263 502.47462,875.97694 C 488.05946,863.67007 442.01287,930.27009 442.01287,930.27009 C 442.01287,930.27009 439.19097,888.56891 455.78607,861.66277 C 472.42542,834.6849 535.43904,780.0475 555.3392,749.95935 C 575.23935,719.87121 588.95176,683.95033 597.47462,657.44141 C 605.99748,630.93249 613.27556,580.34187 613.27556,580.34187"
973 style="opacity:1;fill:#202020;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
974 </clipPath>
975 <filter
976 inkscape:collect="always"
977 id="filter4120"
978 x="-0.2770822"
979 width="1.5541644"
980 y="-0.32482043"
981 height="1.6496409">
982 <feGaussianBlur
983 inkscape:collect="always"
984 stdDeviation="19.956289"
985 id="feGaussianBlur4122" />
986 </filter>
987 <clipPath
988 clipPathUnits="userSpaceOnUse"
989 id="clipPath3631">
990 <path
991 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
992 d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z"
993 id="path3633"
994 sodipodi:nodetypes="czzzzzzcccccccccczczz" />
995 </clipPath>
996 <clipPath
997 clipPathUnits="userSpaceOnUse"
998 id="clipPath3665">
999 <path
1000 sodipodi:nodetypes="czzcczcc"
1001 id="path3667"
1002 d="M 366.88839,504.13471 C 366.88839,504.13471 337.33433,544.70776 319.03125,578.42042 C 300.72816,612.13309 260.41016,704.77736 248.67411,749.49185 C 236.91471,794.29529 186.17411,873.06329 186.17411,873.06329 L 262.24554,891.27757 C 262.24554,891.27757 274.05266,878.45422 293.31696,845.20614 C 312.58126,811.95806 353.67411,706.63471 353.67411,706.63471 L 366.88839,504.13471 z"
1003 style="opacity:1;fill:#0f0f0f;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
1004 </clipPath>
1005 <clipPath
1006 clipPathUnits="userSpaceOnUse"
1007 id="clipPath3677">
1008 <path
1009 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1010 d="M 586.13271,997.98981 C 592.92681,1016.8926 596.6264,1031.2895 598.02301,1049.2017 C 599.41963,1067.114 594.24033,1101.0025 595.12255,1119.8578 C 596.0043,1138.703 603.25624,1159.9568 622.46718,1168.8267 C 641.88605,1177.7925 671.78642,1179.038 696.58702,1165.6811 C 721.38762,1152.3241 753.98804,1095.3556 767.56128,1068.3724 C 781.18513,1041.2885 806.32235,953.87507 812.22208,918.60385 C 818.12181,883.33264 814.77262,877.30308 807.6046,869.54836 C 810.2449,841.70821 806.10488,814.61293 820.71429,782.36218 C 790.46528,794.18788 783.332,822.52288 772.3954,847.86726 C 764.39449,796.93433 772.60456,776.59407 775.71429,746.6479 C 746.64953,761.42581 732.85278,793.76192 730.71429,839.50504 C 719.79034,838.20084 709.32285,835.07081 697.14286,838.79075 C 696.87882,792.76741 695.6793,761.90134 706.05349,724.58426 C 652.79802,745.61112 643.10621,831.17836 650,837.36218 C 639.11718,837.89718 628.62892,836.06488 617.14286,840.21933 C 617.78178,797.64798 616.8824,755.31072 587.14286,717.36218 C 587.14286,717.36218 556.1848,798.28452 555.71429,820.93361 C 555.24378,843.5827 565.16588,861.09949 565.16588,861.09949 C 565.16588,861.09949 556.59781,897.84 558.86729,919.33172 C 561.16314,941.07318 579.31019,979.00789 586.13271,997.98981 z"
1011 id="path3679"
1012 sodipodi:nodetypes="czzzzzzcccccccccczczz" />
1013 </clipPath>
1014 <filter
1015 inkscape:collect="always"
1016 id="filter3898">
1017 <feGaussianBlur
1018 inkscape:collect="always"
1019 stdDeviation="10.892985"
1020 id="feGaussianBlur3900" />
1021 </filter>
1022 <filter
1023 inkscape:collect="always"
1024 id="filter4130"
1025 x="-0.49509686"
1026 width="1.9901937"
1027 y="-0.26708817"
1028 height="1.5341763">
1029 <feGaussianBlur
1030 inkscape:collect="always"
1031 stdDeviation="10.730622"
1032 id="feGaussianBlur4132" />
1033 </filter>
1034 <filter
1035 inkscape:collect="always"
1036 id="filter4141"
1037 x="-0.40611032"
1038 width="1.8122206"
1039 y="-0.30260596"
1040 height="1.6052119">
1041 <feGaussianBlur
1042 inkscape:collect="always"
1043 stdDeviation="9.8586086"
1044 id="feGaussianBlur4143" />
1045 </filter>
1046 <clipPath
1047 clipPathUnits="userSpaceOnUse"
1048 id="clipPath4177">
1049 <path
1050 sodipodi:nodetypes="czzzzzzcccccccccczczz"
1051 id="path4179"
1052 d="M 586.13271,997.98981 C 592.92681,1016.8926 596.6264,1031.2895 598.02301,1049.2017 C 599.41963,1067.114 594.24033,1101.0025 595.12255,1119.8578 C 596.0043,1138.703 603.25624,1159.9568 622.46718,1168.8267 C 641.88605,1177.7925 671.78642,1179.038 696.58702,1165.6811 C 721.38762,1152.3241 753.98804,1095.3556 767.56128,1068.3724 C 781.18513,1041.2885 806.32235,953.87507 812.22208,918.60385 C 818.12181,883.33264 814.77262,877.30308 807.6046,869.54836 C 810.2449,841.70821 806.10488,814.61293 820.71429,782.36218 C 790.46528,794.18788 783.332,822.52288 772.3954,847.86726 C 764.39449,796.93433 772.60456,776.59407 775.71429,746.6479 C 746.64953,761.42581 732.85278,793.76192 730.71429,839.50504 C 719.79034,838.20084 709.32285,835.07081 697.14286,838.79075 C 696.87882,792.76741 695.6793,761.90134 706.05349,724.58426 C 652.79802,745.61112 643.10621,831.17836 650,837.36218 C 639.11718,837.89718 628.62892,836.06488 617.14286,840.21933 C 617.78178,797.64798 616.8824,755.31072 587.14286,717.36218 C 587.14286,717.36218 556.1848,798.28452 555.71429,820.93361 C 555.24378,843.5827 565.16588,861.09949 565.16588,861.09949 C 565.16588,861.09949 556.59781,897.84 558.86729,919.33172 C 561.16314,941.07318 579.31019,979.00789 586.13271,997.98981 z"
1053 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1054 </clipPath>
1055 <filter
1056 inkscape:collect="always"
1057 id="filter4185">
1058 <feGaussianBlur
1059 inkscape:collect="always"
1060 stdDeviation="3.6164709"
1061 id="feGaussianBlur4187" />
1062 </filter>
1063 <filter
1064 inkscape:collect="always"
1065 id="filter4105">
1066 <feGaussianBlur
1067 inkscape:collect="always"
1068 stdDeviation="3.8640966"
1069 id="feGaussianBlur4107" />
1070 </filter>
1071 <clipPath
1072 clipPathUnits="userSpaceOnUse"
1073 id="clipPath2833">
1074 <path
1075 style="opacity:1;fill:#292929;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1076 d="M 569.03125,1018.7776 C 564.74554,1019.4919 541.4031,1022.3957 511.17411,1028.7776 C 480.94512,1035.1595 453.86016,1033.7437 375.38803,1046.1072 C 295.53625,1058.688 281.32367,1088.6495 267.26578,1093.1715 C 252.56564,1097.9001 121.88839,1027.349 121.88839,1027.349 L 126.17411,933.06329 C 126.17411,933.06329 212.05962,916.86235 238.31696,899.49186 C 264.57431,882.12137 283.89934,849.82588 297.60268,828.06329 C 311.30602,806.3007 330.45982,756.63471 330.45982,756.63471 L 569.03125,1018.7776 z"
1077 id="path2835"
1078 sodipodi:nodetypes="czzzcczzcc" />
1079 </clipPath>
1080 <linearGradient
1081 inkscape:collect="always"
1082 xlink:href="#linearGradient2843"
1083 id="linearGradient2841"
1084 gradientUnits="userSpaceOnUse"
1085 x1="347.89655"
1086 y1="1070.2124"
1087 x2="275.58191"
1088 y2="867.97992" />
1089 <linearGradient
1090 inkscape:collect="always"
1091 xlink:href="#linearGradient3627"
1092 id="linearGradient3688"
1093 gradientUnits="userSpaceOnUse"
1094 x1="699.32867"
1095 y1="269.76755"
1096 x2="698.97504"
1097 y2="346.1351" />
1098 <mask
1099 maskUnits="userSpaceOnUse"
1100 id="mask3684">
1101 <path
1102 sodipodi:type="arc"
1103 style="opacity:1;fill:url(#linearGradient3688);fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.43724918px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1104 id="path3686"
1105 sodipodi:cx="579.474"
1106 sodipodi:cy="260.57516"
1107 sodipodi:rx="192.6866"
1108 sodipodi:ry="164.04877"
1109 d="M 772.1606,260.57516 A 192.6866,164.04877 0 1 1 386.7874,260.57516 A 192.6866,164.04877 0 1 1 772.1606,260.57516 z"
1110 transform="translate(-174.03125,62.156036)" />
1111 </mask>
1112 <clipPath
1113 clipPathUnits="userSpaceOnUse"
1114 id="clipPath3622">
1115 <path
1116 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1117 d="M 266.27183,924.57186 C 264.86456,943.37307 265.12693,957.32289 268.35357,973.87514 C 271.58022,990.42748 284.75965,1019.7825 288.68797,1037.0589 C 292.61419,1054.326 291.3821,1075.3685 276.22853,1088.2071 C 260.91092,1101.1845 234.17726,1109.806 208.39623,1103.9409 C 182.61517,1098.0756 138.84716,1054.7175 119.80604,1033.7126 C 100.6939,1012.6293 56.045183,939.86194 41.867508,909.43681 C 27.689836,879.01169 29.207903,872.71824 33.747793,863.90708 C 24.381071,839.38658 21.334081,813.84027 0.035335518,788.33044 C 30.360815,791.44488 43.915625,815.28677 60.161025,835.47019 C 54.631129,787.39416 42.10631,771.05369 31.787073,744.74589 C 61.781368,750.82755 82.366433,776.61829 95.766856,817.45839 C 105.32101,813.54048 114.00462,808.08545 125.95427,808.39719 C 114.65677,766.70139 108.00481,738.48135 89.267015,707.32725 C 142.70898,712.99758 172.92404,787.96657 168.23844,795.28805 C 178.21641,793.04406 187.24409,788.75767 198.67497,789.63638 C 187.42601,751.28936 177.62716,712.76848 195.01526,670.9882 C 195.01526,670.9882 243.30204,736.42507 249.40492,756.79397 C 255.50779,777.16288 250.92373,795.49449 250.92373,795.49449 C 250.92373,795.49449 267.8833,826.57978 271.21765,846.58862 C 274.59075,866.82997 267.68496,905.69194 266.27183,924.57186 z"
1118 id="path3624"
1119 sodipodi:nodetypes="czzzzzzcccccccccczczz" />
1120 </clipPath>
1121 <clipPath
1122 clipPathUnits="userSpaceOnUse"
1123 id="clipPath3636">
1124 <path
1125 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1126 d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z"
1127 id="path3638"
1128 sodipodi:nodetypes="czzzzzzcccccccccczczz" />
1129 </clipPath>
1130 <linearGradient
1131 inkscape:collect="always"
1132 xlink:href="#linearGradient3660"
1133 id="linearGradient3666"
1134 x1="1255.7386"
1135 y1="667.09216"
1136 x2="893.69995"
1137 y2="858.01099"
1138 gradientUnits="userSpaceOnUse" />
1139 <filter
1140 inkscape:collect="always"
1141 id="filter3779"
1142 x="-0.087980822"
1143 width="1.1759616"
1144 y="-0.17728332"
1145 height="1.3545666">
1146 <feGaussianBlur
1147 inkscape:collect="always"
1148 stdDeviation="16.340344"
1149 id="feGaussianBlur3781" />
1150 </filter>
1151 <filter
1152 id="filter3785"
1153 inkscape:label="White Fur">
1154 <feTurbulence
1155 id="feTurbulence3787"
1156 in="SourceAlpha"
1157 type="fractalNoise"
1158 baseFrequency="0.24044943820224721"
1159 numOctaves="10"
1160 seed="655"
1161 result="result0" />
1162 <feDisplacementMap
1163 id="feDisplacementMap3789"
1164 in="SourceGraphic"
1165 in2="result0"
1166 scale="62"
1167 xChannelSelector="B"
1168 yChannelSelector="G" />
1169 </filter>
1170 <filter
1171 inkscape:collect="always"
1172 id="filter3677">
1173 <feGaussianBlur
1174 inkscape:collect="always"
1175 stdDeviation="2.0397518"
1176 id="feGaussianBlur3679" />
1177 </filter>
1178 <clipPath
1179 clipPathUnits="userSpaceOnUse"
1180 id="clipPath3722">
1181 <path
1182 style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1183 d="M 709.28572,844.50504 C 763.57143,843.07647 835.32072,829.45305 879.28572,817.71932 C 923.33843,805.96218 1005.172,781.37208 1054.6428,759.86218 C 1103.9821,738.40946 1168.2465,700.58058 1208.9286,667.71933 C 1249.4367,634.99864 1261.3185,611.89952 1269.6429,634.1479 C 1278.012,656.51569 1253.2359,690.47352 1231.7857,715.21933 C 1210.1816,740.14273 1179.0544,767.92466 1132.8571,804.50504 C 1086.6598,841.08542 976.77458,906.08967 920,933.07647 C 862.93394,960.20183 791.79666,991.31489 747.85714,1005.5765 C 703.91762,1019.8381 616.42857,1036.6479 616.42857,1036.6479 L 709.28572,844.50504 z"
1184 id="path3724"
1185 sodipodi:nodetypes="czzzzzzzzcc" />
1186 </clipPath>
1187 <clipPath
1188 clipPathUnits="userSpaceOnUse"
1189 id="clipPath3986">
1190 <path
1191 style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1192 d="M 709.28572,844.50504 C 763.57143,843.07647 835.32072,829.45305 879.28572,817.71932 C 923.33843,805.96218 1005.172,781.37208 1054.6428,759.86218 C 1103.9821,738.40946 1168.2465,700.58058 1208.9286,667.71933 C 1249.4367,634.99864 1261.3185,611.89952 1269.6429,634.1479 C 1278.012,656.51569 1253.2359,690.47352 1231.7857,715.21933 C 1210.1816,740.14273 1179.0544,767.92466 1132.8571,804.50504 C 1086.6598,841.08542 976.77458,906.08967 920,933.07647 C 862.93394,960.20183 791.79666,991.31489 747.85714,1005.5765 C 703.91762,1019.8381 616.42857,1036.6479 616.42857,1036.6479 L 709.28572,844.50504 z"
1193 id="path3988"
1194 sodipodi:nodetypes="czzzzzzzzcc" />
1195 </clipPath>
1196 <clipPath
1197 clipPathUnits="userSpaceOnUse"
1198 id="clipPath3992">
1199 <path
1200 style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1201 d="M 709.28572,844.50504 C 763.57143,843.07647 835.32072,829.45305 879.28572,817.71932 C 923.33843,805.96218 1005.172,781.37208 1054.6428,759.86218 C 1103.9821,738.40946 1168.2465,700.58058 1208.9286,667.71933 C 1249.4367,634.99864 1261.3185,611.89952 1269.6429,634.1479 C 1278.012,656.51569 1253.2359,690.47352 1231.7857,715.21933 C 1210.1816,740.14273 1179.0544,767.92466 1132.8571,804.50504 C 1086.6598,841.08542 976.77458,906.08967 920,933.07647 C 862.93394,960.20183 791.79666,991.31489 747.85714,1005.5765 C 703.91762,1019.8381 616.42857,1036.6479 616.42857,1036.6479 L 709.28572,844.50504 z"
1202 id="path3994"
1203 sodipodi:nodetypes="czzzzzzzzcc" />
1204 </clipPath>
1205 <clipPath
1206 clipPathUnits="userSpaceOnUse"
1207 id="clipPath3998">
1208 <path
1209 style="opacity:1;fill:#262f2f;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1210 d="M 178.21428,274.14789 C 174.40985,248.88366 161.40456,223.50987 161.05748,198.62266 C 160.87122,185.26714 164.33033,172.05175 174.81301,159.06861 C 211.16003,93.772775 291.75392,74.373925 360.74767,67.603183 C 447.67006,56.586382 545.66034,85.543255 594.11901,163.00447 C 648.24303,238.73777 650.79377,335.54359 674.73105,422.53242 C 704.16884,549.66002 729.51019,678.74656 735.12329,809.38277 C 732.05981,887.56462 726.69695,974.56692 674.62008,1037.5169 C 626.59354,1087.8743 551.83361,1087.5699 487.5502,1096.5192 C 396.99481,1101.1742 303.19867,1080.3734 225.7663,1032.321 C 160.99066,994.38099 130.03611,918.84237 128.4869,846.30142 C 120.09773,766.42626 154.87842,692.49291 180.1073,619.14181 C 187.57791,536.38074 189.52016,452.89406 189.76064,369.75697 C 188.92382,337.56153 182.67111,305.93964 178.21428,274.14789 z"
1211 id="path4000"
1212 sodipodi:nodetypes="cscccccccccccc" />
1213 </clipPath>
1214 <filter
1215 inkscape:collect="always"
1216 id="filter4002"
1217 x="-0.24334238"
1218 width="1.4866848"
1219 y="-0.39104807"
1220 height="1.7820961">
1221 <feGaussianBlur
1222 inkscape:collect="always"
1223 stdDeviation="14.589518"
1224 id="feGaussianBlur4004" />
1225 </filter>
1226 <filter
1227 inkscape:collect="always"
1228 id="filter4010"
1229 x="-0.14577261"
1230 width="1.2915452"
1231 y="-0.23523259"
1232 height="1.4704652">
1233 <feGaussianBlur
1234 inkscape:collect="always"
1235 stdDeviation="4.4442907"
1236 id="feGaussianBlur4012" />
1237 </filter>
1238 <filter
1239 inkscape:collect="always"
1240 id="filter4053">
1241 <feGaussianBlur
1242 inkscape:collect="always"
1243 stdDeviation="0.6062947"
1244 id="feGaussianBlur4055" />
1245 </filter>
1246 <filter
1247 inkscape:collect="always"
1248 id="filter4079">
1249 <feGaussianBlur
1250 inkscape:collect="always"
1251 stdDeviation="6.5887624"
1252 id="feGaussianBlur4081" />
1253 </filter>
1254 <filter
1255 inkscape:collect="always"
1256 id="filter4083">
1257 <feGaussianBlur
1258 inkscape:collect="always"
1259 stdDeviation="1.5052066"
1260 id="feGaussianBlur4085" />
1261 </filter>
1262 <radialGradient
1263 inkscape:collect="always"
1264 xlink:href="#linearGradient4113"
1265 id="radialGradient4119"
1266 cx="296.33783"
1267 cy="427.17749"
1268 fx="296.33783"
1269 fy="427.17749"
1270 r="19.704132"
1271 gradientUnits="userSpaceOnUse"
1272 gradientTransform="matrix(2.9797125,0,0,2.9797125,-599.28727,-827.0855)" />
1273 <filter
1274 inkscape:collect="always"
1275 id="filter6949"
1276 x="-0.10294895"
1277 width="1.2058979"
1278 y="-0.34224695"
1279 height="1.6844939">
1280 <feGaussianBlur
1281 inkscape:collect="always"
1282 stdDeviation="1.1675612"
1283 id="feGaussianBlur6951" />
1284 </filter>
1285 <filter
1286 inkscape:collect="always"
1287 id="filter6953"
1288 x="-0.098320946"
1289 width="1.1966419"
1290 y="-0.19750816"
1291 height="1.3950163">
1292 <feGaussianBlur
1293 inkscape:collect="always"
1294 stdDeviation="1.1675612"
1295 id="feGaussianBlur6955" />
1296 </filter>
1297 <filter
1298 inkscape:collect="always"
1299 id="filter6957"
1300 x="-0.098213427"
1301 width="1.1964267"
1302 y="-0.19838208"
1303 height="1.3967642">
1304 <feGaussianBlur
1305 inkscape:collect="always"
1306 stdDeviation="1.1675612"
1307 id="feGaussianBlur6959" />
1308 </filter>
1309 <filter
1310 inkscape:collect="always"
1311 id="filter6961"
1312 x="-0.09919104"
1313 width="1.1983821"
1314 y="-0.22643611"
1315 height="1.4528722">
1316 <feGaussianBlur
1317 inkscape:collect="always"
1318 stdDeviation="1.1675612"
1319 id="feGaussianBlur6963" />
1320 </filter>
1321 <filter
1322 inkscape:collect="always"
1323 id="filter6965"
1324 x="-0.099081434"
1325 width="1.1981629"
1326 y="-0.22529824"
1327 height="1.4505965">
1328 <feGaussianBlur
1329 inkscape:collect="always"
1330 stdDeviation="1.1675612"
1331 id="feGaussianBlur6967" />
1332 </filter>
1333 <filter
1334 inkscape:collect="always"
1335 id="filter6969"
1336 x="-0.10450897"
1337 width="1.2090179"
1338 y="-0.40468886"
1339 height="1.8093777">
1340 <feGaussianBlur
1341 inkscape:collect="always"
1342 stdDeviation="1.1675612"
1343 id="feGaussianBlur6971" />
1344 </filter>
1345 <filter
1346 inkscape:collect="always"
1347 id="filter6973"
1348 x="-0.10330495"
1349 width="1.2066098"
1350 y="-0.36439717"
1351 height="1.7287945">
1352 <feGaussianBlur
1353 inkscape:collect="always"
1354 stdDeviation="1.1675612"
1355 id="feGaussianBlur6975" />
1356 </filter>
1357 <filter
1358 inkscape:collect="always"
1359 id="filter6977"
1360 x="-0.10224481"
1361 width="1.2044896"
1362 y="-0.32371372"
1363 height="1.6474274">
1364 <feGaussianBlur
1365 inkscape:collect="always"
1366 stdDeviation="1.1675612"
1367 id="feGaussianBlur6979" />
1368 </filter>
1369 <filter
1370 inkscape:collect="always"
1371 id="filter6981"
1372 x="-0.10052545"
1373 width="1.2010509"
1374 y="-0.2742162"
1375 height="1.5484324">
1376 <feGaussianBlur
1377 inkscape:collect="always"
1378 stdDeviation="1.1675612"
1379 id="feGaussianBlur6983" />
1380 </filter>
1381 <filter
1382 inkscape:collect="always"
1383 id="filter6985"
1384 x="-0.098428868"
1385 width="1.1968577"
1386 y="-0.20853186"
1387 height="1.4170637">
1388 <feGaussianBlur
1389 inkscape:collect="always"
1390 stdDeviation="1.1675612"
1391 id="feGaussianBlur6987" />
1392 </filter>
1393 <filter
1394 inkscape:collect="always"
1395 id="filter6989"
1396 x="-0.098428868"
1397 width="1.1968577"
1398 y="-0.20287035"
1399 height="1.4057407">
1400 <feGaussianBlur
1401 inkscape:collect="always"
1402 stdDeviation="1.1675612"
1403 id="feGaussianBlur6991" />
1404 </filter>
1405 <filter
1406 inkscape:collect="always"
1407 id="filter6993"
1408 x="-0.098213255"
1409 width="1.1964265"
1410 y="-0.19838208"
1411 height="1.3967642">
1412 <feGaussianBlur
1413 inkscape:collect="always"
1414 stdDeviation="1.1675612"
1415 id="feGaussianBlur6995" />
1416 </filter>
1417 <filter
1418 inkscape:collect="always"
1419 id="filter6997">
1420 <feGaussianBlur
1421 inkscape:collect="always"
1422 stdDeviation="1.1675612"
1423 id="feGaussianBlur6999" />
1424 </filter>
1425 <filter
1426 inkscape:collect="always"
1427 id="filter7001">
1428 <feGaussianBlur
1429 inkscape:collect="always"
1430 stdDeviation="1.1675612"
1431 id="feGaussianBlur7003" />
1432 </filter>
1433 <filter
1434 inkscape:collect="always"
1435 id="filter7285"
1436 x="-0.030884685"
1437 width="1.0617694"
1438 y="-0.10267408"
1439 height="1.2053483">
1440 <feGaussianBlur
1441 inkscape:collect="always"
1442 stdDeviation="0.35026836"
1443 id="feGaussianBlur7287" />
1444 </filter>
1445 <filter
1446 inkscape:collect="always"
1447 id="filter7289">
1448 <feGaussianBlur
1449 inkscape:collect="always"
1450 stdDeviation="0.35026836"
1451 id="feGaussianBlur7291" />
1452 </filter>
1453 <filter
1454 inkscape:collect="always"
1455 id="filter7293">
1456 <feGaussianBlur
1457 inkscape:collect="always"
1458 stdDeviation="0.35026836"
1459 id="feGaussianBlur7295" />
1460 </filter>
1461 <filter
1462 inkscape:collect="always"
1463 id="filter7297">
1464 <feGaussianBlur
1465 inkscape:collect="always"
1466 stdDeviation="0.35026836"
1467 id="feGaussianBlur7299" />
1468 </filter>
1469 <filter
1470 inkscape:collect="always"
1471 id="filter7301">
1472 <feGaussianBlur
1473 inkscape:collect="always"
1474 stdDeviation="0.35026836"
1475 id="feGaussianBlur7303" />
1476 </filter>
1477 <filter
1478 inkscape:collect="always"
1479 id="filter7305">
1480 <feGaussianBlur
1481 inkscape:collect="always"
1482 stdDeviation="0.35026836"
1483 id="feGaussianBlur7307" />
1484 </filter>
1485 <filter
1486 inkscape:collect="always"
1487 id="filter7309">
1488 <feGaussianBlur
1489 inkscape:collect="always"
1490 stdDeviation="0.35026836"
1491 id="feGaussianBlur7311" />
1492 </filter>
1493 <filter
1494 inkscape:collect="always"
1495 id="filter7313">
1496 <feGaussianBlur
1497 inkscape:collect="always"
1498 stdDeviation="0.35026836"
1499 id="feGaussianBlur7315" />
1500 </filter>
1501 <filter
1502 inkscape:collect="always"
1503 id="filter7317">
1504 <feGaussianBlur
1505 inkscape:collect="always"
1506 stdDeviation="0.35026836"
1507 id="feGaussianBlur7319" />
1508 </filter>
1509 <filter
1510 inkscape:collect="always"
1511 id="filter7321">
1512 <feGaussianBlur
1513 inkscape:collect="always"
1514 stdDeviation="0.35026836"
1515 id="feGaussianBlur7323" />
1516 </filter>
1517 <filter
1518 inkscape:collect="always"
1519 id="filter7325"
1520 x="-0.031352691"
1521 width="1.0627054"
1522 y="-0.12140666"
1523 height="1.2428133">
1524 <feGaussianBlur
1525 inkscape:collect="always"
1526 stdDeviation="0.35026836"
1527 id="feGaussianBlur7327" />
1528 </filter>
1529 <filter
1530 inkscape:collect="always"
1531 id="filter7329"
1532 x="-0.030991485"
1533 width="1.061983"
1534 y="-0.10931916"
1535 height="1.2186383">
1536 <feGaussianBlur
1537 inkscape:collect="always"
1538 stdDeviation="0.35026836"
1539 id="feGaussianBlur7331" />
1540 </filter>
1541 <filter
1542 inkscape:collect="always"
1543 id="filter7333">
1544 <feGaussianBlur
1545 inkscape:collect="always"
1546 stdDeviation="0.35026836"
1547 id="feGaussianBlur7335" />
1548 </filter>
1549 <filter
1550 inkscape:collect="always"
1551 id="filter7337">
1552 <feGaussianBlur
1553 inkscape:collect="always"
1554 stdDeviation="0.35026836"
1555 id="feGaussianBlur7339" />
1556 </filter>
1557 <filter
1558 inkscape:collect="always"
1559 id="filter7345">
1560 <feGaussianBlur
1561 inkscape:collect="always"
1562 stdDeviation="1.7233839"
1563 id="feGaussianBlur7347" />
1564 </filter>
1565 <clipPath
1566 clipPathUnits="userSpaceOnUse"
1567 id="clipPath7421">
1568 <path
1569 sodipodi:type="inkscape:offset"
1570 inkscape:radius="0"
1571 inkscape:original="M 1111.4062 -285.9375 L 1107.4688 -284.0625 C 1107.4283 -284.05228 1107.3692 -284.04201 1107.3438 -284.03125 C 1106.925 -283.8184 1107.1791 -283.93067 1106.6875 -283.71875 C 1106.2014 -283.50919 1104.9499 -283.13456 1102.5938 -282.25 C 1099.2626 -280.99942 1096.7895 -280.10016 1095.5938 -279.1875 C 1094.0576 -279.16623 1091.8733 -278.95419 1089.9375 -278.46875 C 1086.956 -277.72108 1085.0823 -277.29474 1083.1875 -276.875 C 1081.2927 -276.45527 1081.512 -276.23281 1080.3125 -276 C 1079.0159 -275.74833 1078.5911 -276.00899 1074.875 -275.21875 C 1071.3851 -274.4766 1065.9802 -273.28768 1064.7188 -272.53125 C 1063.1348 -272.71203 1060.8513 -272.85303 1058.875 -272.5625 C 1055.8346 -272.11554 1053.9588 -271.88974 1052.0312 -271.65625 C 1051.3758 -271.57687 1050.9902 -271.45547 1050.6875 -271.375 C 1050.2613 -271.24334 1050.0017 -271.11498 1049.3125 -271.03125 C 1048.0009 -270.87188 1047.5503 -271.18808 1043.7812 -270.75 C 1040.2273 -270.33691 1034.7758 -269.47718 1033.5312 -268.8125 C 1031.9322 -269.10979 1029.6735 -269.34669 1027.6875 -269.15625 C 1024.6287 -268.86293 1022.7155 -268.67226 1020.7812 -268.5 C 1018.847 -268.32773 1019.0926 -268.07763 1017.875 -267.96875 C 1016.5588 -267.85105 1016.1152 -268.13238 1012.3438 -267.71875 C 1008.8017 -267.3303 1003.3359 -266.50948 1002.0625 -265.84375 C 1000.4636 -266.13844 998.1753 -266.35076 996.1875 -266.15625 C 993.12921 -265.857 991.2463 -265.67601 989.3125 -265.5 C 988.65501 -265.44015 988.27245 -265.32144 987.96875 -265.25 C 987.54105 -265.13104 987.28525 -265.03193 986.59375 -264.96875 C 985.27775 -264.84849 984.834 -265.16363 981.0625 -264.75 C 977.50631 -264.35998 972.0569 -263.51084 970.8125 -262.84375 C 969.21381 -263.13793 966.95265 -263.36747 964.96875 -263.15625 C 961.91305 -262.83092 959.9947 -262.63001 958.0625 -262.4375 C 956.13031 -262.24499 956.37275 -261.99662 955.15625 -261.875 C 953.84137 -261.74353 953.3932 -262.03954 949.625 -261.59375 C 946.08611 -261.17509 940.6473 -260.30158 939.375 -259.625 C 937.77741 -259.90604 935.51505 -260.04543 933.53125 -259.8125 C 930.47927 -259.45413 928.58625 -259.24464 926.65625 -259.03125 C 926.00007 -258.95869 925.6156 -258.85856 925.3125 -258.78125 C 924.88571 -258.65402 924.6276 -258.51405 923.9375 -258.4375 C 922.62411 -258.29181 922.17015 -258.61152 918.40625 -258.125 C 914.85737 -257.66624 909.4276 -256.70598 908.1875 -256 C 906.59441 -256.24424 904.3537 -256.38135 902.375 -256.125 C 899.32741 -255.73018 897.4243 -255.47655 895.5 -255.21875 C 893.57571 -254.96096 893.7739 -254.72522 892.5625 -254.5625 C 891.25301 -254.3866 890.8153 -254.66688 887.0625 -254.09375 C 883.53821 -253.55551 878.1393 -252.39458 876.875 -251.65625 C 875.28751 -251.85979 873.0295 -251.91098 871.0625 -251.5625 C 868.03631 -251.02638 866.1636 -250.70081 864.25 -250.375 C 863.59941 -250.26423 863.2363 -250.10406 862.9375 -250 C 862.51681 -249.83512 862.27405 -249.6687 861.59375 -249.53125 C 860.29905 -249.26966 859.86665 -249.53745 856.15625 -248.71875 C 852.65777 -247.9468 847.31035 -246.33582 846.09375 -245.5 C 844.53085 -245.57745 842.33625 -245.41472 840.40625 -244.90625 C 837.43387 -244.12312 835.58855 -243.67416 833.71875 -243.15625 C 831.84875 -242.63835 832.0521 -242.38897 830.875 -242.0625 C 829.60251 -241.7096 829.17795 -241.95541 825.53125 -240.875 C 822.10657 -239.86037 816.88185 -237.94183 815.65625 -237.03125 C 814.11747 -237.01851 811.93645 -236.75903 810.03125 -236.15625 C 807.10027 -235.22891 805.2809 -234.69783 803.4375 -234.09375 C 802.81071 -233.88837 802.44585 -233.70117 802.15625 -233.5625 C 801.74867 -233.34889 801.50295 -233.15375 800.84375 -232.9375 C 799.58925 -232.52596 799.1576 -232.74846 795.5625 -231.5 C 792.17261 -230.32283 786.96755 -228.2863 785.78125 -227.34375 C 784.25737 -227.28408 782.1312 -226.94888 780.25 -226.28125 C 777.35261 -225.25296 775.55095 -224.60577 773.71875 -223.96875 C 771.88655 -223.33174 772.0909 -223.12021 770.9375 -222.71875 C 769.69071 -222.28479 769.27395 -222.51903 765.71875 -221.15625 C 762.38005 -219.87645 757.23165 -217.6737 756.03125 -216.6875 C 754.52407 -216.57981 752.39555 -216.1887 750.53125 -215.46875 C 747.66307 -214.36115 745.90735 -213.68719 744.09375 -213 C 743.47705 -212.76637 743.0973 -212.55797 742.8125 -212.40625 C 742.81251 -212.40625 742.8125 -212.37673 742.8125 -212.375 L 734.8125 -209.1875 L 736.625 -194.46875 C 736.36701 -194.52956 742.8125 -191.15625 742.8125 -191.15625 C 743.03891 -191.30093 743.26145 -191.42886 743.53125 -191.53125 C 744.61177 -191.94123 745.70285 -191.74702 749.53125 -193.21875 C 753.35977 -194.69049 754.7553 -195.22373 755.4375 -195.625 C 756.11711 -196.02478 757.04925 -196.50437 757.65625 -197.15625 C 759.48317 -197.294 761.22705 -197.64948 762.59375 -198.15625 C 765.56175 -199.25677 767.4691 -199.96244 769.375 -200.625 C 771.28081 -201.28754 771.72915 -202.03987 772.78125 -202.40625 C 773.87287 -202.78636 774.97635 -202.57163 778.84375 -203.9375 C 782.71115 -205.30336 784.1269 -205.76458 784.8125 -206.15625 C 785.51361 -206.55677 786.5133 -207.08923 787.125 -207.75 C 789.09581 -207.80466 790.94195 -208.13463 792.40625 -208.625 C 795.40777 -209.63008 797.3324 -210.24671 799.25 -210.875 C 800.78861 -211.3791 801.42415 -211.92177 802.15625 -212.3125 C 802.38647 -212.44681 802.63215 -212.56623 802.90625 -212.65625 C 804.00457 -213.01673 805.0877 -212.73762 809 -213.96875 C 812.91231 -215.19988 814.366 -215.6417 815.0625 -216 C 815.75641 -216.35697 816.6926 -216.79261 817.3125 -217.40625 C 819.17771 -217.42891 820.94835 -217.67308 822.34375 -218.09375 C 825.37415 -219.00729 827.33615 -219.52385 829.28125 -220.0625 C 831.22637 -220.60114 831.70745 -221.32702 832.78125 -221.625 C 833.89527 -221.93415 835.00125 -221.61761 838.96875 -222.65625 C 842.93625 -223.69488 844.38625 -224.08898 845.09375 -224.40625 C 845.82855 -224.73584 846.90765 -225.15997 847.53125 -225.78125 C 849.52907 -225.66525 851.3887 -225.80134 852.875 -226.15625 C 855.95311 -226.89125 857.9584 -227.25719 859.9375 -227.65625 C 861.52541 -227.97643 862.1818 -228.4468 862.9375 -228.75 C 863.17501 -228.8568 863.4044 -228.94276 863.6875 -229 C 864.82091 -229.22919 865.99215 -228.79107 870.03125 -229.5 C 874.07067 -230.20893 875.5315 -230.42709 876.25 -230.6875 C 876.96581 -230.94694 877.95435 -231.25474 878.59375 -231.78125 C 880.51795 -231.54176 882.34165 -231.55672 883.78125 -231.78125 C 886.90767 -232.26887 888.9358 -232.48192 890.9375 -232.75 C 892.93921 -233.01807 893.42625 -233.69514 894.53125 -233.84375 C 895.67767 -233.99793 896.8071 -233.54218 900.875 -234.0625 C 904.94281 -234.58282 906.43525 -234.75823 907.15625 -235 C 907.89337 -235.24714 908.95435 -235.58623 909.59375 -236.125 C 911.64375 -235.78947 913.56745 -235.72704 915.09375 -235.90625 C 918.23595 -236.27521 920.27375 -236.46561 922.28125 -236.6875 C 923.89207 -236.86552 924.5459 -237.2957 925.3125 -237.53125 C 925.55341 -237.61677 925.80655 -237.68685 926.09375 -237.71875 C 927.24345 -237.84647 928.39505 -237.3721 932.46875 -237.84375 C 936.54245 -238.3154 938.0278 -238.45435 938.75 -238.6875 C 939.46941 -238.91977 940.45025 -239.16096 941.09375 -239.65625 C 943.03005 -239.32279 944.8638 -239.25201 946.3125 -239.40625 C 949.45851 -239.7412 951.49 -239.92484 953.5 -240.125 C 955.50991 -240.32514 955.98415 -240.95139 957.09375 -241.0625 C 958.24485 -241.17778 959.39025 -240.69744 963.46875 -241.125 C 967.54725 -241.55256 969.05765 -241.68709 969.78125 -241.90625 C 970.52047 -242.13011 971.57685 -242.4195 972.21875 -242.9375 C 974.27575 -242.53883 976.2206 -242.4441 977.75 -242.59375 C 980.89871 -242.90185 982.9258 -243.067 984.9375 -243.25 C 986.55151 -243.39682 987.20055 -243.81055 987.96875 -244.03125 C 988.21005 -244.11211 988.4623 -244.16116 988.75 -244.1875 C 989.90211 -244.29295 991.0429 -243.79475 995.125 -244.1875 C 999.20711 -244.58025 1000.7139 -244.71834 1001.4375 -244.9375 C 1002.1584 -245.15583 1003.1371 -245.3852 1003.7812 -245.875 C 1005.7193 -245.52501 1007.5501 -245.42062 1009 -245.5625 C 1012.1487 -245.8706 1014.1758 -246.03575 1016.1875 -246.21875 C 1018.1991 -246.40174 1018.7017 -247.05677 1019.8125 -247.15625 C 1020.9648 -247.25948 1022.1047 -246.77142 1026.1875 -247.15625 C 1030.2704 -247.54107 1031.7762 -247.65725 1032.5 -247.875 C 1033.2393 -248.09743 1034.2956 -248.38949 1034.9375 -248.90625 C 1036.9949 -248.50448 1038.9404 -248.40292 1040.4688 -248.5625 C 1043.6153 -248.89102 1045.6458 -249.0852 1047.6562 -249.28125 C 1049.2692 -249.43854 1049.9219 -249.91273 1050.6875 -250.15625 C 1050.9282 -250.24429 1051.1507 -250.27762 1051.4375 -250.3125 C 1052.5858 -250.4522 1053.7542 -249.97259 1057.8125 -250.5625 C 1061.8708 -251.15242 1063.3743 -251.33964 1064.0938 -251.59375 C 1064.8104 -251.84691 1065.7684 -252.15182 1066.4062 -252.6875 C 1068.3259 -252.47556 1070.1262 -252.53609 1071.5625 -252.78125 C 1074.6816 -253.31365 1076.6741 -253.70986 1078.6562 -254.09375 C 1080.6383 -254.47762 1081.1305 -255.1334 1082.2188 -255.375 C 1083.3475 -255.62566 1084.489 -255.25871 1088.4688 -256.25 C 1092.4483 -257.24127 1093.8983 -257.6693 1094.5938 -258.03125 C 1095.316 -258.40725 1096.3555 -258.90183 1096.9688 -259.5625 C 1098.9317 -259.57454 1100.7625 -259.85355 1102.1875 -260.40625 C 1105.1387 -261.55085 1107.0607 -262.27567 1108.875 -263.15625 C 1110.3307 -263.86277 1111.1941 -264.85828 1111.4062 -265.15625 C 1111.6185 -265.4542 1111.5051 -265.8848 1111.5312 -265.90625 C 1111.5742 -265.94148 1111.8716 -266.00028 1112.0312 -266.34375 C 1112.8902 -268.19082 1114.3544 -271.97139 1114.4688 -272.65625 C 1114.5825 -273.33839 1114.6368 -274.00902 1114.6875 -274.40625 C 1114.7169 -274.63575 1114.5404 -275.28515 1114.5625 -275.34375 C 1114.5934 -275.42579 1114.8508 -275.59432 1114.9062 -275.84375 C 1115.1725 -277.04206 1114.9953 -278.05111 1114.7812 -279.46875 C 1114.5673 -280.88638 1113.8096 -284.08338 1113.1562 -284.9375 C 1112.4973 -285.79922 1111.9314 -285.94801 1111.4062 -285.9375 z "
1572 style="fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1573 id="path7423"
1574 d="M 1111.4062,-285.9375 L 1107.4688,-284.0625 C 1107.4283,-284.05228 1107.3692,-284.04201 1107.3438,-284.03125 C 1106.925,-283.8184 1107.1791,-283.93067 1106.6875,-283.71875 C 1106.2014,-283.50919 1104.9499,-283.13456 1102.5938,-282.25 C 1099.2626,-280.99942 1096.7895,-280.10016 1095.5938,-279.1875 C 1094.0576,-279.16623 1091.8733,-278.95419 1089.9375,-278.46875 C 1086.956,-277.72108 1085.0823,-277.29474 1083.1875,-276.875 C 1081.2927,-276.45527 1081.512,-276.23281 1080.3125,-276 C 1079.0159,-275.74833 1078.5911,-276.00899 1074.875,-275.21875 C 1071.3851,-274.4766 1065.9802,-273.28768 1064.7188,-272.53125 C 1063.1348,-272.71203 1060.8513,-272.85303 1058.875,-272.5625 C 1055.8346,-272.11554 1053.9588,-271.88974 1052.0312,-271.65625 C 1051.3758,-271.57687 1050.9902,-271.45547 1050.6875,-271.375 C 1050.2613,-271.24334 1050.0017,-271.11498 1049.3125,-271.03125 C 1048.0009,-270.87188 1047.5503,-271.18808 1043.7812,-270.75 C 1040.2273,-270.33691 1034.7758,-269.47718 1033.5312,-268.8125 C 1031.9322,-269.10979 1029.6735,-269.34669 1027.6875,-269.15625 C 1024.6287,-268.86293 1022.7155,-268.67226 1020.7812,-268.5 C 1018.847,-268.32773 1019.0926,-268.07763 1017.875,-267.96875 C 1016.5588,-267.85105 1016.1152,-268.13238 1012.3438,-267.71875 C 1008.8017,-267.3303 1003.3359,-266.50948 1002.0625,-265.84375 C 1000.4636,-266.13844 998.1753,-266.35076 996.1875,-266.15625 C 993.12921,-265.857 991.2463,-265.67601 989.3125,-265.5 C 988.65501,-265.44015 988.27245,-265.32144 987.96875,-265.25 C 987.54105,-265.13104 987.28525,-265.03193 986.59375,-264.96875 C 985.27775,-264.84849 984.834,-265.16363 981.0625,-264.75 C 977.50631,-264.35998 972.0569,-263.51084 970.8125,-262.84375 C 969.21381,-263.13793 966.95265,-263.36747 964.96875,-263.15625 C 961.91305,-262.83092 959.9947,-262.63001 958.0625,-262.4375 C 956.13031,-262.24499 956.37275,-261.99662 955.15625,-261.875 C 953.84137,-261.74353 953.3932,-262.03954 949.625,-261.59375 C 946.08611,-261.17509 940.6473,-260.30158 939.375,-259.625 C 937.77741,-259.90604 935.51505,-260.04543 933.53125,-259.8125 C 930.47927,-259.45413 928.58625,-259.24464 926.65625,-259.03125 C 926.00007,-258.95869 925.6156,-258.85856 925.3125,-258.78125 C 924.88571,-258.65402 924.6276,-258.51405 923.9375,-258.4375 C 922.62411,-258.29181 922.17015,-258.61152 918.40625,-258.125 C 914.85737,-257.66624 909.4276,-256.70598 908.1875,-256 C 906.59441,-256.24424 904.3537,-256.38135 902.375,-256.125 C 899.32741,-255.73018 897.4243,-255.47655 895.5,-255.21875 C 893.57571,-254.96096 893.7739,-254.72522 892.5625,-254.5625 C 891.25301,-254.3866 890.8153,-254.66688 887.0625,-254.09375 C 883.53821,-253.55551 878.1393,-252.39458 876.875,-251.65625 C 875.28751,-251.85979 873.0295,-251.91098 871.0625,-251.5625 C 868.03631,-251.02638 866.1636,-250.70081 864.25,-250.375 C 863.59941,-250.26423 863.2363,-250.10406 862.9375,-250 C 862.51681,-249.83512 862.27405,-249.6687 861.59375,-249.53125 C 860.29905,-249.26966 859.86665,-249.53745 856.15625,-248.71875 C 852.65777,-247.9468 847.31035,-246.33582 846.09375,-245.5 C 844.53085,-245.57745 842.33625,-245.41472 840.40625,-244.90625 C 837.43387,-244.12312 835.58855,-243.67416 833.71875,-243.15625 C 831.84875,-242.63835 832.0521,-242.38897 830.875,-242.0625 C 829.60251,-241.7096 829.17795,-241.95541 825.53125,-240.875 C 822.10657,-239.86037 816.88185,-237.94183 815.65625,-237.03125 C 814.11747,-237.01851 811.93645,-236.75903 810.03125,-236.15625 C 807.10027,-235.22891 805.2809,-234.69783 803.4375,-234.09375 C 802.81071,-233.88837 802.44585,-233.70117 802.15625,-233.5625 C 801.74867,-233.34889 801.50295,-233.15375 800.84375,-232.9375 C 799.58925,-232.52596 799.1576,-232.74846 795.5625,-231.5 C 792.17261,-230.32283 786.96755,-228.2863 785.78125,-227.34375 C 784.25737,-227.28408 782.1312,-226.94888 780.25,-226.28125 C 777.35261,-225.25296 775.55095,-224.60577 773.71875,-223.96875 C 771.88655,-223.33174 772.0909,-223.12021 770.9375,-222.71875 C 769.69071,-222.28479 769.27395,-222.51903 765.71875,-221.15625 C 762.38005,-219.87645 757.23165,-217.6737 756.03125,-216.6875 C 754.52407,-216.57981 752.39555,-216.1887 750.53125,-215.46875 C 747.66307,-214.36115 745.90735,-213.68719 744.09375,-213 C 743.47705,-212.76637 743.0973,-212.55797 742.8125,-212.40625 C 742.81251,-212.40625 742.8125,-212.37673 742.8125,-212.375 L 734.8125,-209.1875 L 736.625,-194.46875 C 736.36701,-194.52956 742.8125,-191.15625 742.8125,-191.15625 C 743.03891,-191.30093 743.26145,-191.42886 743.53125,-191.53125 C 744.61177,-191.94123 745.70285,-191.74702 749.53125,-193.21875 C 753.35977,-194.69049 754.7553,-195.22373 755.4375,-195.625 C 756.11711,-196.02478 757.04925,-196.50437 757.65625,-197.15625 C 759.48317,-197.294 761.22705,-197.64948 762.59375,-198.15625 C 765.56175,-199.25677 767.4691,-199.96244 769.375,-200.625 C 771.28081,-201.28754 771.72915,-202.03987 772.78125,-202.40625 C 773.87287,-202.78636 774.97635,-202.57163 778.84375,-203.9375 C 782.71115,-205.30336 784.1269,-205.76458 784.8125,-206.15625 C 785.51361,-206.55677 786.5133,-207.08923 787.125,-207.75 C 789.09581,-207.80466 790.94195,-208.13463 792.40625,-208.625 C 795.40777,-209.63008 797.3324,-210.24671 799.25,-210.875 C 800.78861,-211.3791 801.42415,-211.92177 802.15625,-212.3125 C 802.38647,-212.44681 802.63215,-212.56623 802.90625,-212.65625 C 804.00457,-213.01673 805.0877,-212.73762 809,-213.96875 C 812.91231,-215.19988 814.366,-215.6417 815.0625,-216 C 815.75641,-216.35697 816.6926,-216.79261 817.3125,-217.40625 C 819.17771,-217.42891 820.94835,-217.67308 822.34375,-218.09375 C 825.37415,-219.00729 827.33615,-219.52385 829.28125,-220.0625 C 831.22637,-220.60114 831.70745,-221.32702 832.78125,-221.625 C 833.89527,-221.93415 835.00125,-221.61761 838.96875,-222.65625 C 842.93625,-223.69488 844.38625,-224.08898 845.09375,-224.40625 C 845.82855,-224.73584 846.90765,-225.15997 847.53125,-225.78125 C 849.52907,-225.66525 851.3887,-225.80134 852.875,-226.15625 C 855.95311,-226.89125 857.9584,-227.25719 859.9375,-227.65625 C 861.52541,-227.97643 862.1818,-228.4468 862.9375,-228.75 C 863.17501,-228.8568 863.4044,-228.94276 863.6875,-229 C 864.82091,-229.22919 865.99215,-228.79107 870.03125,-229.5 C 874.07067,-230.20893 875.5315,-230.42709 876.25,-230.6875 C 876.96581,-230.94694 877.95435,-231.25474 878.59375,-231.78125 C 880.51795,-231.54176 882.34165,-231.55672 883.78125,-231.78125 C 886.90767,-232.26887 888.9358,-232.48192 890.9375,-232.75 C 892.93921,-233.01807 893.42625,-233.69514 894.53125,-233.84375 C 895.67767,-233.99793 896.8071,-233.54218 900.875,-234.0625 C 904.94281,-234.58282 906.43525,-234.75823 907.15625,-235 C 907.89337,-235.24714 908.95435,-235.58623 909.59375,-236.125 C 911.64375,-235.78947 913.56745,-235.72704 915.09375,-235.90625 C 918.23595,-236.27521 920.27375,-236.46561 922.28125,-236.6875 C 923.89207,-236.86552 924.5459,-237.2957 925.3125,-237.53125 C 925.55341,-237.61677 925.80655,-237.68685 926.09375,-237.71875 C 927.24345,-237.84647 928.39505,-237.3721 932.46875,-237.84375 C 936.54245,-238.3154 938.0278,-238.45435 938.75,-238.6875 C 939.46941,-238.91977 940.45025,-239.16096 941.09375,-239.65625 C 943.03005,-239.32279 944.8638,-239.25201 946.3125,-239.40625 C 949.45851,-239.7412 951.49,-239.92484 953.5,-240.125 C 955.50991,-240.32514 955.98415,-240.95139 957.09375,-241.0625 C 958.24485,-241.17778 959.39025,-240.69744 963.46875,-241.125 C 967.54725,-241.55256 969.05765,-241.68709 969.78125,-241.90625 C 970.52047,-242.13011 971.57685,-242.4195 972.21875,-242.9375 C 974.27575,-242.53883 976.2206,-242.4441 977.75,-242.59375 C 980.89871,-242.90185 982.9258,-243.067 984.9375,-243.25 C 986.55151,-243.39682 987.20055,-243.81055 987.96875,-244.03125 C 988.21005,-244.11211 988.4623,-244.16116 988.75,-244.1875 C 989.90211,-244.29295 991.0429,-243.79475 995.125,-244.1875 C 999.20711,-244.58025 1000.7139,-244.71834 1001.4375,-244.9375 C 1002.1584,-245.15583 1003.1371,-245.3852 1003.7812,-245.875 C 1005.7193,-245.52501 1007.5501,-245.42062 1009,-245.5625 C 1012.1487,-245.8706 1014.1758,-246.03575 1016.1875,-246.21875 C 1018.1991,-246.40174 1018.7017,-247.05677 1019.8125,-247.15625 C 1020.9648,-247.25948 1022.1047,-246.77142 1026.1875,-247.15625 C 1030.2704,-247.54107 1031.7762,-247.65725 1032.5,-247.875 C 1033.2393,-248.09743 1034.2956,-248.38949 1034.9375,-248.90625 C 1036.9949,-248.50448 1038.9404,-248.40292 1040.4688,-248.5625 C 1043.6153,-248.89102 1045.6458,-249.0852 1047.6562,-249.28125 C 1049.2692,-249.43854 1049.9219,-249.91273 1050.6875,-250.15625 C 1050.9282,-250.24429 1051.1507,-250.27762 1051.4375,-250.3125 C 1052.5858,-250.4522 1053.7542,-249.97259 1057.8125,-250.5625 C 1061.8708,-251.15242 1063.3743,-251.33964 1064.0938,-251.59375 C 1064.8104,-251.84691 1065.7684,-252.15182 1066.4062,-252.6875 C 1068.3259,-252.47556 1070.1262,-252.53609 1071.5625,-252.78125 C 1074.6816,-253.31365 1076.6741,-253.70986 1078.6562,-254.09375 C 1080.6383,-254.47762 1081.1305,-255.1334 1082.2188,-255.375 C 1083.3475,-255.62566 1084.489,-255.25871 1088.4688,-256.25 C 1092.4483,-257.24127 1093.8983,-257.6693 1094.5938,-258.03125 C 1095.316,-258.40725 1096.3555,-258.90183 1096.9688,-259.5625 C 1098.9317,-259.57454 1100.7625,-259.85355 1102.1875,-260.40625 C 1105.1387,-261.55085 1107.0607,-262.27567 1108.875,-263.15625 C 1110.3307,-263.86277 1111.1941,-264.85828 1111.4062,-265.15625 C 1111.6185,-265.4542 1111.5051,-265.8848 1111.5312,-265.90625 C 1111.5742,-265.94148 1111.8716,-266.00028 1112.0312,-266.34375 C 1112.8902,-268.19082 1114.3544,-271.97139 1114.4688,-272.65625 C 1114.5825,-273.33839 1114.6368,-274.00902 1114.6875,-274.40625 C 1114.7169,-274.63575 1114.5404,-275.28515 1114.5625,-275.34375 C 1114.5934,-275.42579 1114.8508,-275.59432 1114.9062,-275.84375 C 1115.1725,-277.04206 1114.9953,-278.05111 1114.7812,-279.46875 C 1114.5673,-280.88638 1113.8096,-284.08338 1113.1562,-284.9375 C 1112.4973,-285.79922 1111.9314,-285.94801 1111.4062,-285.9375 z"
1575 transform="translate(8.0045714e-2,-3.125e-2)" />
1576 </clipPath>
1577 <filter
1578 inkscape:collect="always"
1579 id="filter7578"
1580 x="-0.08160872"
1581 width="1.1632174"
1582 y="-0.22659944"
1583 height="1.4531989">
1584 <feGaussianBlur
1585 inkscape:collect="always"
1586 stdDeviation="2.437399"
1587 id="feGaussianBlur7580" />
1588 </filter>
1589 <filter
1590 inkscape:collect="always"
1591 id="filter7594"
1592 x="-0.040804356"
1593 width="1.0816087"
1594 y="-0.11329972"
1595 height="1.2265995">
1596 <feGaussianBlur
1597 inkscape:collect="always"
1598 stdDeviation="1.2186995"
1599 id="feGaussianBlur7596" />
1600 </filter>
1601 <clipPath
1602 clipPathUnits="userSpaceOnUse"
1603 id="clipPath7606">
1604 <path
1605 id="path7608"
1606 d="M 1049.205,-282.26672 L 1049.1152,-282.25891 C 1047.7278,-281.37446 1042.5119,-280.65171 1042.4862,-272.73547 C 1042.462,-265.31022 1057.4991,-255.64401 1059.6425,-254.64172 C 1061.3727,-253.83263 1063.2341,-253.23296 1065.0488,-252.92297 L 1066.4862,-252.70422 C 1068.4059,-252.49228 1070.2062,-252.55281 1071.6425,-252.79797 C 1074.7616,-253.33037 1076.7541,-253.72658 1078.7362,-254.11047 C 1080.7183,-254.49434 1081.2105,-255.15012 1082.2988,-255.39172 C 1083.4275,-255.64238 1084.569,-255.27543 1088.5488,-256.26672 C 1092.5283,-257.258 1093.9782,-257.68602 1094.6738,-258.04797 C 1095.396,-258.42398 1096.4355,-258.91855 1097.0488,-259.57922 C 1099.0117,-259.59127 1100.8425,-259.87027 1102.2675,-260.42297 C 1105.2187,-261.56758 1107.1407,-262.29239 1108.955,-263.17297 C 1110.4107,-263.8795 1111.2741,-264.875 1111.4862,-265.17297 C 1111.6985,-265.47093 1111.5852,-265.90152 1111.6112,-265.92297 C 1111.6542,-265.95821 1111.9517,-266.017 1112.1112,-266.36047 C 1112.9702,-268.20755 1114.4344,-271.98811 1114.5488,-272.67297 C 1114.6625,-273.35512 1114.7168,-274.02574 1114.7675,-274.42297 C 1114.7969,-274.65248 1114.6204,-275.30187 1114.6425,-275.36047 C 1114.6734,-275.44252 1114.9308,-275.61104 1114.9862,-275.86047 C 1115.2525,-277.05879 1115.0754,-278.06783 1114.8612,-279.48547 C 1114.6473,-280.90311 1113.8896,-284.1001 1113.2362,-284.95422 C 1112.8168,-285.50279 1112.4369,-285.74672 1112.08,-285.86047 C 1112.0129,-285.87776 1111.9561,-285.90721 1111.8925,-285.92297 C 1111.8715,-285.92695 1111.8508,-285.91983 1111.83,-285.92297 C 1111.5184,-285.99847 1111.2215,-286.08164 1110.6738,-286.14172 C 1109.6883,-286.24984 1108.2491,-286.40112 1106.705,-286.39172 C 1106.1903,-286.38859 1105.6679,-286.34408 1105.1425,-286.29797 C 1101.5836,-285.98569 1096.1327,-285.30689 1094.9238,-284.67297 C 1093.2907,-285.00699 1090.9756,-285.2852 1088.9862,-285.14172 C 1085.9222,-284.92075 1084.0185,-284.79953 1082.08,-284.67297 C 1080.1416,-284.54642 1080.3939,-284.28433 1079.1738,-284.20422 C 1077.8547,-284.11762 1077.3869,-284.42747 1073.6112,-284.11047 C 1070.0655,-283.81275 1064.6306,-283.1173 1063.3925,-282.48547 C 1061.7591,-282.81998 1059.4466,-283.09786 1057.455,-282.95422 C 1054.3908,-282.73324 1052.4872,-282.58078 1050.5488,-282.45422 C 1049.8896,-282.41119 1049.5064,-282.33029 1049.205,-282.26672 z"
1607 style="opacity:0.82448976;fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1608 </clipPath>
1609 <filter
1610 inkscape:collect="always"
1611 id="filter7610"
1612 x="-0.021942979"
1613 width="1.0438859"
1614 y="-0.10017137"
1615 height="1.2003427">
1616 <feGaussianBlur
1617 inkscape:collect="always"
1618 stdDeviation="0.57530213"
1619 id="feGaussianBlur7612" />
1620 </filter>
1621 <clipPath
1622 clipPathUnits="userSpaceOnUse"
1623 id="clipPath7616">
1624 <path
1625 id="path7618"
1626 d="M 205.47016,-408.97318 L 205.38003,-408.97164 C 203.9344,-408.18598 198.68082,-407.82829 198.10378,-399.93307 C 197.56244,-392.52754 211.88973,-381.83741 213.95811,-380.68826 C 215.62775,-379.76062 217.44286,-379.03275 219.23156,-378.59711 L 220.65023,-378.27877 C 222.5505,-377.93363 224.35065,-377.86862 225.80054,-378.01314 C 228.94914,-378.32698 230.9644,-378.58345 232.96843,-378.82834 C 234.97245,-379.07322 235.50913,-379.69312 236.61162,-379.85833 C 237.75504,-380.02976 238.86821,-379.58419 242.90739,-380.29586 C 246.94627,-381.00755 248.42246,-381.33354 249.14158,-381.64616 C 249.88822,-381.97095 250.95964,-382.39191 251.61747,-383.00826 C 253.57644,-382.88355 255.42223,-383.03435 256.88227,-383.48645 C 259.90603,-384.42272 261.87384,-385.01189 263.74507,-385.76396 C 265.24645,-386.36738 266.17709,-387.30032 266.40943,-387.58279 C 266.64197,-387.86524 266.55894,-388.30268 266.58637,-388.32227 C 266.63172,-388.35443 266.93259,-388.39235 267.11563,-388.72388 C 268.1012,-390.50664 269.82518,-394.17603 269.987,-394.85126 C 270.14794,-395.52383 270.24882,-396.18904 270.32707,-396.58177 C 270.37238,-396.80868 270.24154,-397.46878 270.26767,-397.5257 C 270.30421,-397.6054 270.57272,-397.75558 270.64536,-398.00055 C 270.99449,-399.17741 270.8881,-400.19633 270.77316,-401.62545 C 270.65853,-403.05454 270.12535,-406.29655 269.53303,-407.1941 C 269.15286,-407.77056 268.79088,-408.04035 268.44277,-408.17869 C 268.37703,-408.20061 268.32242,-408.23394 268.26007,-408.2541 C 268.2394,-408.25953 268.21826,-408.25387 268.19773,-408.25845 C 267.89214,-408.35547 267.60176,-408.45912 267.05957,-408.5572 C 266.084,-408.7337 264.65883,-408.98486 263.11782,-409.08304 C 262.60416,-409.11577 262.07992,-409.10775 261.55259,-409.09835 C 257.98058,-409.03472 252.49564,-408.73725 251.24552,-408.18907 C 249.63965,-408.63604 247.34955,-409.07483 245.35499,-409.07027 C 242.28304,-409.06325 240.37552,-409.07493 238.43292,-409.0837 C 236.49041,-409.09248 236.72384,-408.81345 235.50112,-408.81852 C 234.1792,-408.82401 233.73411,-409.16569 229.9455,-409.11245 C 226.38768,-409.06243 220.91754,-408.74723 219.63844,-408.20318 C 218.0323,-408.65065 215.74477,-409.08893 213.74801,-409.08436 C 210.67586,-409.07735 208.76626,-409.05786 206.82375,-409.06662 C 206.16316,-409.06961 205.77525,-409.0156 205.47016,-408.97318 z"
1627 style="opacity:0.82448976;fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1628 </clipPath>
1629 <linearGradient
1630 inkscape:collect="always"
1631 xlink:href="#linearGradient7622"
1632 id="linearGradient7708"
1633 gradientUnits="userSpaceOnUse"
1634 gradientTransform="translate(-19.091883,4.2426407)"
1635 x1="774.97668"
1636 y1="-211.87105"
1637 x2="755.11584"
1638 y2="-202.67865" />
1639 <mask
1640 maskUnits="userSpaceOnUse"
1641 id="mask7704">
1642 <path
1643 style="fill:url(#linearGradient7708);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
1644 d="M 718.40812,-224.31217 L 751.65812,-168.31217 L 1027.6581,-192.31217 L 1187.1581,-240.56217 L 1120.6581,-323.31217 L 718.40812,-224.31217 z"
1645 id="path7706" />
1646 </mask>
1647 <radialGradient
1648 inkscape:collect="always"
1649 xlink:href="#linearGradient8430"
1650 id="radialGradient7904"
1651 gradientUnits="userSpaceOnUse"
1652 gradientTransform="matrix(-0.3324832,0.9022288,-0.9582407,-0.3531242,305.29227,19.909497)"
1653 cx="142.95833"
1654 cy="107.09234"
1655 fx="142.95833"
1656 fy="107.09234"
1657 r="66.981766" />
1658 <radialGradient
1659 inkscape:collect="always"
1660 xlink:href="#linearGradient3317"
1661 id="radialGradient7906"
1662 gradientUnits="userSpaceOnUse"
1663 gradientTransform="matrix(1.0036478,-1.0345492e-7,1.7124628e-7,1.6613125,-160.53487,-96.205369)"
1664 cx="317.78754"
1665 cy="129.65378"
1666 fx="317.78754"
1667 fy="129.65378"
1668 r="47.863216" />
1669 <radialGradient
1670 inkscape:collect="always"
1671 xlink:href="#linearGradient8398"
1672 id="radialGradient7908"
1673 gradientUnits="userSpaceOnUse"
1674 gradientTransform="matrix(2.0747661,-0.1577957,0.2382425,3.1325183,-550.77432,-65.728909)"
1675 cx="325.30847"
1676 cy="80.909554"
1677 fx="325.30847"
1678 fy="80.909554"
1679 r="26.937988" />
1680 <clipPath
1681 clipPathUnits="userSpaceOnUse"
1682 id="clipPath8209">
1683 <path
1684 sodipodi:nodetypes="czcc"
1685 id="path8211"
1686 d="M 734.03125,519.49186 C 734.03125,519.49186 750.78638,556.50992 762.73266,573.44581 C 774.67895,590.3817 815.45982,629.49186 815.45982,629.49186 L 816.05699,490.90211"
1687 style="opacity:1;fill:#1a1a1a;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
1688 </clipPath>
1689 <filter
1690 inkscape:collect="always"
1691 id="filter8225">
1692 <feGaussianBlur
1693 inkscape:collect="always"
1694 stdDeviation="10.661912"
1695 id="feGaussianBlur8227" />
1696 </filter>
1697 <filter
1698 inkscape:collect="always"
1699 id="filter8333">
1700 <feGaussianBlur
1701 inkscape:collect="always"
1702 stdDeviation="7.18"
1703 id="feGaussianBlur8335" />
1704 </filter>
1705 <clipPath
1706 clipPathUnits="userSpaceOnUse"
1707 id="clipPath8338">
1708 <path
1709 sodipodi:nodetypes="czzzzzzcccccccccczczz"
1710 id="path8340"
1711 d="M 266.27183,924.57185 C 264.86456,943.37307 265.12693,957.32289 268.35357,973.87513 C 271.58023,990.42751 284.75966,1019.7825 288.68798,1037.0589 C 292.61419,1054.326 291.38211,1075.3686 276.22854,1088.2071 C 260.91093,1101.1846 234.17727,1109.8061 208.39624,1103.9409 C 182.61518,1098.0756 138.84716,1054.7175 119.80605,1033.7126 C 100.6939,1012.6293 56.045182,939.86193 41.867507,909.4368 C 27.689835,879.01168 29.207902,872.71823 33.747792,863.90708 C 24.38107,839.38658 21.33408,813.84026 0.035334479,788.33044 C 30.360814,791.44487 43.915624,815.28676 60.161024,835.47019 C 54.631128,787.39416 42.106309,771.05368 31.787072,744.74589 C 61.781367,750.82754 82.366432,776.61828 95.766855,817.45839 C 105.32101,813.54047 114.00462,808.08545 125.95427,808.39719 C 114.65677,766.70139 108.0048,738.48134 89.267014,707.32725 C 142.70898,712.99757 172.92404,787.96657 168.23844,795.28805 C 178.21641,793.04406 187.24409,788.75767 198.67497,789.63638 C 187.426,751.28935 177.62715,712.76848 195.01526,670.98819 C 195.01526,670.98819 243.30204,736.42507 249.40491,756.79397 C 255.50779,777.16287 250.92373,795.49448 250.92373,795.49448 C 250.92373,795.49448 267.8833,826.57978 271.21765,846.58862 C 274.59075,866.82996 267.68496,905.69193 266.27183,924.57185 z"
1712 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1713 </clipPath>
1714 <filter
1715 inkscape:collect="always"
1716 id="filter8354">
1717 <feGaussianBlur
1718 inkscape:collect="always"
1719 stdDeviation="6.82"
1720 id="feGaussianBlur8356" />
1721 </filter>
1722 <clipPath
1723 clipPathUnits="userSpaceOnUse"
1724 id="clipPath8359">
1725 <path
1726 sodipodi:nodetypes="czzzzzzcccccccccczczz"
1727 id="path8361"
1728 d="M 266.27183,924.57185 C 264.86456,943.37307 265.12693,957.32289 268.35357,973.87513 C 271.58023,990.42751 284.75966,1019.7825 288.68798,1037.0589 C 292.61419,1054.326 291.38211,1075.3686 276.22854,1088.2071 C 260.91093,1101.1846 234.17727,1109.8061 208.39624,1103.9409 C 182.61518,1098.0756 138.84716,1054.7175 119.80605,1033.7126 C 100.6939,1012.6293 56.045182,939.86193 41.867507,909.4368 C 27.689835,879.01168 29.207902,872.71823 33.747792,863.90708 C 24.38107,839.38658 21.33408,813.84026 0.035334479,788.33044 C 30.360814,791.44487 43.915624,815.28676 60.161024,835.47019 C 54.631128,787.39416 42.106309,771.05368 31.787072,744.74589 C 61.781367,750.82754 82.366432,776.61828 95.766855,817.45839 C 105.32101,813.54047 114.00462,808.08545 125.95427,808.39719 C 114.65677,766.70139 108.0048,738.48134 89.267014,707.32725 C 142.70898,712.99757 172.92404,787.96657 168.23844,795.28805 C 178.21641,793.04406 187.24409,788.75767 198.67497,789.63638 C 187.426,751.28935 177.62715,712.76848 195.01526,670.98819 C 195.01526,670.98819 243.30204,736.42507 249.40491,756.79397 C 255.50779,777.16287 250.92373,795.49448 250.92373,795.49448 C 250.92373,795.49448 267.8833,826.57978 271.21765,846.58862 C 274.59075,866.82996 267.68496,905.69193 266.27183,924.57185 z"
1729 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1730 </clipPath>
1731 <filter
1732 inkscape:collect="always"
1733 id="filter8379"
1734 x="-0.14413793"
1735 width="1.288276"
1736 y="-0.10278689"
1737 height="1.2055738">
1738 <feGaussianBlur
1739 inkscape:collect="always"
1740 stdDeviation="7.389266"
1741 id="feGaussianBlur8381" />
1742 </filter>
1743 <clipPath
1744 clipPathUnits="userSpaceOnUse"
1745 id="clipPath8392">
1746 <path
1747 sodipodi:nodetypes="czzzzzzcccccccccczczz"
1748 id="path8394"
1749 d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z"
1750 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1751 </clipPath>
1752 <filter
1753 inkscape:collect="always"
1754 id="filter8404"
1755 x="-0.090268657"
1756 width="1.1805373"
1757 y="-0.10250848"
1758 height="1.205017">
1759 <feGaussianBlur
1760 inkscape:collect="always"
1761 stdDeviation="5.3457272"
1762 id="feGaussianBlur8406" />
1763 </filter>
1764 <clipPath
1765 clipPathUnits="userSpaceOnUse"
1766 id="clipPath8417">
1767 <path
1768 sodipodi:nodetypes="czzzzzzcccccccccczczz"
1769 id="path8419"
1770 d="M 760.16396,935.83377 C 766.95806,954.73656 770.65765,969.13346 772.05426,987.04566 C 773.45088,1004.958 768.27158,1038.8465 769.1538,1057.7018 C 770.03555,1076.547 777.28749,1097.8008 796.49843,1106.6707 C 815.9173,1115.6365 845.81767,1116.882 870.61827,1103.5251 C 895.41887,1090.1681 928.01929,1033.1996 941.59253,1006.2164 C 955.21638,979.13246 980.3536,891.71903 986.25333,856.44781 C 992.15306,821.1766 988.80387,815.14704 981.63585,807.39232 C 984.27615,779.55217 980.13613,752.45689 994.74554,720.20614 C 964.49653,732.03184 957.36325,760.36684 946.42665,785.71122 C 938.42574,734.77829 946.63581,714.43803 949.74554,684.49186 C 920.68078,699.26977 906.88403,731.60588 904.74554,777.349 C 893.82159,776.0448 883.3541,772.91477 871.17411,776.63471 C 870.91007,730.61137 869.71055,699.7453 880.08474,662.42822 C 826.82927,683.45508 817.13746,769.02232 824.03125,775.20614 C 813.14843,775.74114 802.66017,773.90884 791.17411,778.06329 C 791.81303,735.49194 790.91365,693.15468 761.17411,655.20614 C 761.17411,655.20614 730.21605,736.12848 729.74554,758.77757 C 729.27503,781.42666 739.19713,798.94345 739.19713,798.94345 C 739.19713,798.94345 730.62906,835.68396 732.89854,857.17568 C 735.19439,878.91714 753.34144,916.85185 760.16396,935.83377 z"
1771 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1772 </clipPath>
1773 </defs>
1774 <metadata
1775 id="metadata7">
1776 <rdf:RDF>
1777 <cc:Work
1778 rdf:about="">
1779 <dc:format>image/svg+xml</dc:format>
1780 <dc:type
1781 rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
1782 </cc:Work>
1783 </rdf:RDF>
1784 </metadata>
1785 <g
1786 inkscape:groupmode="layer"
1787 id="layer1"
1788 inkscape:label="Shadow">
1789 <path
1790 style="opacity:0.5;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter11361);enable-background:new"
1791 d="M 304.64285,526.6479 C 294.64285,527.00505 286.42857,529.50504 286.42857,529.50504 L 293.92857,535.57647 L 304.28571,539.1479 L 320.35714,539.50504 L 342.85714,534.1479 L 350.71428,535.21933 L 371.07143,533.07647 L 360.71428,539.86219 C 366.17351,538.83858 378.10757,543.4313 370.35714,545.21933 C 368.61714,545.62075 384.28571,540.57648 384.28571,540.57648 L 386.78571,535.93361 L 390.35714,526.6479 L 401.78571,526.6479 L 419.99999,522.00504 L 423.57143,517.00505 L 407.49999,518.07647 L 395.35714,520.21933 L 380.71428,515.21933 L 310.02218,531.92707 L 304.64285,526.6479 z"
1792 id="path10326"
1793 sodipodi:nodetypes="cccccccccsccccccccccc"
1794 transform="matrix(10.726753,0,0,10.726753,-2882.1235,-4565.4583)"
1795 inkscape:export-filename="/home/cheeseness/Documents/LCA09/mascot/tuz_new.png"
1796 inkscape:export-xdpi="142.10527"
1797 inkscape:export-ydpi="142.10527" />
1798 </g>
1799 <g
1800 inkscape:groupmode="layer"
1801 id="layer20"
1802 inkscape:label="New Ear">
1803 <g
1804 style="opacity:1;display:inline;enable-background:new"
1805 id="g7882"
1806 transform="matrix(0.71084,-0.1937433,0.262963,0.9648058,503.68027,136.48399)">
1807 <path
1808 sodipodi:nodetypes="czzzzcc"
1809 id="path7876"
1810 d="M 245.12255,100.05344 C 245.12255,100.05344 197.99444,68.406519 177.9079,64.252501 C 157.86998,60.108538 139.435,60.934923 125.97426,77.859824 C 112.51352,94.784725 113.89687,139.12502 112.43872,164.82937 C 110.98057,190.53372 114.98817,235.00638 130.04332,253.49489 C 145.09848,271.98339 175.92966,267.07991 179.97027,274.90859 C 182.1831,279.19595 245.12255,100.05344 245.12255,100.05344 z"
1811 style="opacity:1;fill:url(#radialGradient7904);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
1812 <path
1813 sodipodi:nodetypes="czzzzzc"
1814 id="path7878"
1815 d="M 135.37935,82.017807 C 135.37935,82.017807 161.7229,83.95659 173.01242,95.920995 C 184.42736,108.01833 186.74699,117.25251 188.30828,133.65558 C 189.87165,150.08057 187.45871,162.0737 180.49446,169.69292 C 173.53021,177.31214 179.49017,189.27624 154.57841,181.76399 C 129.66665,174.25174 127.54617,153.98101 128.06318,135.45924 C 128.58039,116.93026 135.37935,82.017807 135.37935,82.017807 z"
1816 style="opacity:1;fill:url(#radialGradient7906);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
1817 <path
1818 sodipodi:nodetypes="czccssc"
1819 id="path7880"
1820 d="M 135.648,81.927211 C 135.648,81.927211 131.00335,98.292286 136.23625,110.49031 C 141.72419,123.28285 163.4605,154.75038 163.4605,165.14596 L 186.11675,160.14596 C 188.65893,153.17952 189.32727,144.3939 188.30425,133.64596 C 186.74296,117.24289 184.43795,108.02455 173.023,95.927211 C 163.36812,85.695164 141.42989,82.552354 135.648,81.927211 z"
1821 style="opacity:1;fill:url(#radialGradient7908);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1822 </g>
1823 </g>
1824 <g
1825 inkscape:groupmode="layer"
1826 id="layer21"
1827 inkscape:label="Rendered2"
1828 style="display:inline">
1829 <path
1830 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1831 d="M 845.03125,1154.7776 C 840.74554,1155.4919 817.4031,1158.3957 787.17411,1164.7776 C 756.94512,1171.1595 729.86016,1169.7437 651.38803,1182.1072 C 571.53625,1194.688 557.32367,1224.6495 543.26578,1229.1715 C 528.56564,1233.9001 397.88839,1163.349 397.88839,1163.349 L 402.17411,1069.0633 C 402.17411,1069.0633 488.05962,1052.8624 514.31696,1035.4919 C 540.57431,1018.1214 559.89934,985.82588 573.60268,964.06329 C 587.30602,942.3007 606.45982,892.63471 606.45982,892.63471 L 845.03125,1154.7776 z"
1832 id="path7917"
1833 sodipodi:nodetypes="czzzcczzcc" />
1834 <path
1835 style="opacity:0.5;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8888);enable-background:accumulate"
1836 d="M 332.34019,898.38549 L 299.60838,837.08593 L 261.99104,882.19239 C 264.16779,883.5095 267.76529,861.33636 307.59144,817.77531 L 332.34019,898.38549 z"
1837 id="path7919"
1838 clip-path="url(#clipPath8658)"
1839 sodipodi:nodetypes="ccccc"
1840 transform="translate(276,136)" />
1841 <path
1842 style="opacity:1;fill:url(#linearGradient2841);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8892);enable-background:accumulate"
1843 d="M 200.81833,863.03015 L 347.18943,811.41136 L 591.14127,1037.6855 L 349.31075,1177.6927 L 168.29141,1090.0114 L 200.81833,863.03015 z"
1844 id="path7923"
1845 clip-path="url(#clipPath2833)"
1846 sodipodi:nodetypes="cccccc"
1847 transform="translate(276,136)" />
1848 <path
1849 style="opacity:1;fill:#0f0f0f;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1850 d="M 642.88839,640.13471 C 642.88839,640.13471 613.33433,680.70776 595.03125,714.42042 C 576.72816,748.13309 536.41016,840.77736 524.67411,885.49185 C 512.91471,930.29529 462.17411,1009.0633 462.17411,1009.0633 L 538.24554,1027.2776 C 538.24554,1027.2776 550.05266,1014.4542 569.31696,981.20614 C 588.58126,947.95806 629.67411,842.63471 629.67411,842.63471 L 642.88839,640.13471 z"
1851 id="path7921"
1852 sodipodi:nodetypes="czzcczcc" />
1853 <path
1854 style="opacity:0.4;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8856);enable-background:accumulate"
1855 d="M 430.28131,381.94122 C 423.21025,384.76965 194.10007,414.09303 194.10007,414.09303 L 154.46046,773.92607 L 244.65895,866.56568 L 296.98485,752.01438 L 397.45289,565.62246 L 430.28131,381.94122 z"
1856 id="path7925"
1857 sodipodi:nodetypes="ccccccc"
1858 clip-path="url(#clipPath3665)"
1859 transform="translate(276,136)" />
1860 <path
1861 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1862 d="M 969.67051,1164.0346 C 969.67051,1164.0346 992.92679,1175.4283 1005.7383,1184.5107 C 1018.4357,1193.5122 1035.2107,1209.1598 1047.4307,1221.8712 C 1059.7362,1234.6714 1067.5434,1244.4699 1088.9634,1246.032 C 1110.3956,1247.5949 1142.2458,1237.2444 1162.2594,1221.3678 C 1182.2729,1205.4912 1207.9063,1152.135 1207.9063,1152.135 L 1080.7455,1009.0633"
1863 id="path7927"
1864 sodipodi:nodetypes="czzzzcc" />
1865 <path
1866 style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8860);enable-background:accumulate"
1867 d="M 331.34019,641.50471 L 216.17367,835.36467 L 260.2153,925.96265 L 357.79603,732.21539 L 331.34019,641.50471 z"
1868 id="path7929"
1869 clip-path="url(#clipPath8642)"
1870 sodipodi:nodetypes="ccccc"
1871 transform="translate(276,136)" />
1872 <g
1873 style="opacity:1;display:inline;enable-background:new"
1874 id="g7931"
1875 transform="matrix(0.9934486,0.1142802,-0.1142802,0.9934486,-9.24324,588.09054)"
1876 inkscape:transform-center-x="-347.89063"
1877 inkscape:transform-center-y="-28.255779">
1878 <path
1879 style="opacity:1;fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1880 d="M 1049.205,-282.26672 L 1049.1152,-282.25891 C 1047.7278,-281.37446 1042.5119,-280.65171 1042.4862,-272.73547 C 1042.462,-265.31022 1057.4991,-255.64401 1059.6425,-254.64172 C 1061.3727,-253.83263 1063.2341,-253.23296 1065.0488,-252.92297 L 1066.4862,-252.70422 C 1068.4059,-252.49228 1070.2062,-252.55281 1071.6425,-252.79797 C 1074.7616,-253.33037 1076.7541,-253.72658 1078.7362,-254.11047 C 1080.7183,-254.49434 1081.2105,-255.15012 1082.2988,-255.39172 C 1083.4275,-255.64238 1084.569,-255.27543 1088.5488,-256.26672 C 1092.5283,-257.258 1093.9782,-257.68602 1094.6738,-258.04797 C 1095.396,-258.42398 1096.4355,-258.91855 1097.0488,-259.57922 C 1099.0117,-259.59127 1100.8425,-259.87027 1102.2675,-260.42297 C 1105.2187,-261.56758 1107.1407,-262.29239 1108.955,-263.17297 C 1110.4107,-263.8795 1111.2741,-264.875 1111.4862,-265.17297 C 1111.6985,-265.47093 1111.5852,-265.90152 1111.6112,-265.92297 C 1111.6542,-265.95821 1111.9517,-266.017 1112.1112,-266.36047 C 1112.9702,-268.20755 1114.4344,-271.98811 1114.5488,-272.67297 C 1114.6625,-273.35512 1114.7168,-274.02574 1114.7675,-274.42297 C 1114.7969,-274.65248 1114.6204,-275.30187 1114.6425,-275.36047 C 1114.6734,-275.44252 1114.9308,-275.61104 1114.9862,-275.86047 C 1115.2525,-277.05879 1115.0754,-278.06783 1114.8612,-279.48547 C 1114.6473,-280.90311 1113.8896,-284.1001 1113.2362,-284.95422 C 1112.8168,-285.50279 1112.4369,-285.74672 1112.08,-285.86047 C 1112.0129,-285.87776 1111.9561,-285.90721 1111.8925,-285.92297 C 1111.8715,-285.92695 1111.8508,-285.91983 1111.83,-285.92297 C 1111.5184,-285.99847 1111.2215,-286.08164 1110.6738,-286.14172 C 1109.6883,-286.24984 1108.2491,-286.40112 1106.705,-286.39172 C 1106.1903,-286.38859 1105.6679,-286.34408 1105.1425,-286.29797 C 1101.5836,-285.98569 1096.1327,-285.30689 1094.9238,-284.67297 C 1093.2907,-285.00699 1090.9756,-285.2852 1088.9862,-285.14172 C 1085.9222,-284.92075 1084.0185,-284.79953 1082.08,-284.67297 C 1080.1416,-284.54642 1080.3939,-284.28433 1079.1738,-284.20422 C 1077.8547,-284.11762 1077.3869,-284.42747 1073.6112,-284.11047 C 1070.0655,-283.81275 1064.6306,-283.1173 1063.3925,-282.48547 C 1061.7591,-282.81998 1059.4466,-283.09786 1057.455,-282.95422 C 1054.3908,-282.73324 1052.4872,-282.58078 1050.5488,-282.45422 C 1049.8896,-282.41119 1049.5064,-282.33029 1049.205,-282.26672 z"
1881 id="path7933" />
1882 <g
1883 clip-path="url(#clipPath7616)"
1884 style="display:inline;filter:url(#filter7610);enable-background:new"
1885 id="g7935"
1886 transform="matrix(0.9975712,-6.9654277e-2,6.9654277e-2,0.9975712,872.72062,140.02502)">
1887 <path
1888 sodipodi:nodetypes="ccssscsssscscsscsssccscssccsscssscc"
1889 id="path7937"
1890 d="M 229.94262,-409.12268 C 226.38481,-409.07267 220.91842,-408.76259 219.63928,-408.21854 C 218.03319,-408.66601 215.73612,-409.09985 213.73933,-409.09528 C 210.66734,-409.08826 208.77464,-409.08651 206.83206,-409.09528 C 206.17159,-409.09827 205.78447,-409.02811 205.47939,-408.98569 C 205.47939,-408.98569 205.47939,-407.88976 205.47939,-407.88976 C 205.59911,-408.06923 205.87191,-408.58022 206.42914,-408.65691 C 207.17672,-408.7598 211.59842,-408.80814 213.73933,-408.76651 C 215.51393,-408.73198 218.19456,-408.49224 220.12854,-407.80756 C 220.44994,-407.69378 220.74779,-407.53378 221.02073,-407.39659 C 222.98415,-406.40966 228.96409,-403.09505 228.96409,-403.09505 C 228.96409,-403.09505 222.33134,-407.04273 221.48122,-407.53358 C 221.27791,-407.65097 220.90658,-407.79127 220.44513,-407.94456 C 221.66576,-408.39235 225.5211,-408.56427 228.27336,-408.65691 C 231.29786,-408.75873 231.62112,-408.7465 233.68405,-408.46512 C 235.81336,-408.17469 237.02256,-407.86236 237.02256,-407.86236 C 237.02255,-407.86236 236.9442,-408.50354 238.05865,-408.65691 C 238.80622,-408.7598 243.22794,-408.80814 245.36884,-408.76651 C 247.43834,-408.72625 250.73489,-408.35935 252.65024,-407.39659 C 253.65356,-406.89226 255.68588,-405.82796 257.44559,-404.86088 L 257.5412,-404.88031 C 257.5412,-404.88031 253.96086,-407.04273 253.11073,-407.53358 C 252.90742,-407.65097 252.5361,-407.79127 252.07464,-407.94456 C 253.29526,-408.39235 257.12183,-408.56427 259.87409,-408.65691 C 262.89859,-408.75873 263.22184,-408.7465 265.28478,-408.46512 C 267.23794,-408.19872 268.2977,-407.93506 268.47939,-407.88976 C 268.47939,-407.88976 268.4523,-408.20122 268.4523,-408.20122 C 268.04327,-408.33767 267.73806,-408.43457 267.05192,-408.5587 C 265.75111,-408.79403 263.6528,-409.16026 261.54335,-409.12268 C 257.9714,-409.05904 252.49007,-408.76672 251.24001,-408.21854 C 249.63418,-408.66549 247.36339,-409.09984 245.36884,-409.09528 C 242.29685,-409.08826 240.37536,-409.08651 238.43279,-409.09528 C 236.49023,-409.10406 236.72011,-408.81621 235.49721,-408.8213 C 234.1753,-408.8268 233.73109,-409.17593 229.94262,-409.12268 C 229.94262,-409.12268 229.94262,-409.12268 229.94262,-409.12268"
1891 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1892 <path
1893 id="path7939"
1894 d="M 206.1989,-407.47878 C 208.11911,-406.66172 210.77605,-405.28595 212.35787,-404.08139 C 213.93971,-402.87683 215.26544,-402.30771 217.91246,-400.16344 C 218.79803,-399.44606 219.66111,-398.81359 220.50439,-398.2417 L 221.04496,-398.43181 C 220.33173,-398.9152 219.5772,-399.45212 218.77587,-400.05384 C 215.95364,-402.17305 215.14932,-402.86357 212.7608,-404.32798 C 210.37226,-405.79238 208.66132,-406.69374 206.1989,-407.47878 C 206.1989,-407.47878 206.1989,-407.47878 206.1989,-407.47878"
1895 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1896 sodipodi:nodetypes="cssccsscc" />
1897 <path
1898 id="path7941"
1899 d="M 237.79963,-407.47878 C 239.71984,-406.66172 242.40557,-405.28595 243.98738,-404.08139 C 244.80045,-403.46223 245.54587,-403.01097 246.43784,-402.42738 L 247.08684,-402.54404 C 246.28853,-403.12041 245.51507,-403.63839 244.39031,-404.32798 C 242.00177,-405.79238 240.26205,-406.69374 237.79963,-407.47878 C 237.79963,-407.47878 237.79963,-407.47878 237.79963,-407.47878"
1900 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1901 sodipodi:nodetypes="csccscc" />
1902 </g>
1903 <g
1904 clip-path="url(#clipPath7606)"
1905 id="g7943">
1906 <path
1907 style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7578);enable-background:new"
1908 d="M 1056.25,-278.80481 C 1060.3946,-280.28358 1066.25,-275.67981 1066.25,-275.67981 C 1067.149,-275.39889 1068.9751,-276.57428 1068.8743,-277.36595 C 1068.8743,-277.36595 1067.324,-279.22657 1068.5,-280.30481 C 1069.676,-281.38305 1073.796,-278.79743 1076,-278.67981 C 1078.204,-278.56219 1081.5621,-278.90922 1083,-279.42981 C 1084.4379,-279.9504 1084.1129,-280.8544 1085.625,-281.17981 C 1087.1371,-281.50522 1090.7439,-280.14227 1092.6855,-280.01098 C 1094.6271,-279.87969 1097.3336,-279.67671 1098.5,-280.17981 C 1099.6664,-280.68291 1098.6782,-281.33902 1100.375,-282.05481 C 1102.0718,-282.7706 1108.1352,-283.01143 1110,-282.17981 C 1111.8648,-281.34819 1111.8099,-281.66061 1112.625,-279.17981 C 1113.4401,-276.69901 1120.0648,-274.01696 1111.5,-265.80481 C 1102.9352,-257.59266 1052.1221,-252.01887 1045.875,-263.05481 C 1039.6279,-274.09075 1052.1054,-277.32604 1056.25,-278.80481 z"
1909 id="path7945"
1910 sodipodi:nodetypes="czzzzzzzzzzzzzz" />
1911 <path
1912 style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7594);enable-background:new"
1913 d="M 1058.5,-275.42981 C 1062.6446,-276.90858 1068.5,-272.30481 1068.5,-272.30481 C 1069.399,-272.02389 1071.2251,-273.19928 1071.1243,-273.99095 C 1071.1243,-273.99095 1069.574,-275.85157 1070.75,-276.92981 C 1071.926,-278.00805 1076.046,-275.42243 1078.25,-275.30481 C 1080.454,-275.18719 1083.8121,-275.53422 1085.25,-276.05481 C 1086.6879,-276.5754 1086.3629,-277.4794 1087.875,-277.80481 C 1089.3871,-278.13022 1092.9939,-276.76727 1094.9355,-276.63598 C 1096.8771,-276.50469 1099.5836,-276.30171 1100.75,-276.80481 C 1101.9164,-277.30791 1100.9282,-277.96402 1102.625,-278.67981 C 1104.3218,-279.3956 1110.3852,-279.63643 1112.25,-278.80481 C 1114.1148,-277.97319 1114.0599,-278.28561 1114.875,-275.80481 C 1115.6901,-273.32401 1122.3148,-270.64196 1113.75,-262.42981 C 1105.1852,-254.21766 1054.3721,-248.64387 1048.125,-259.67981 C 1041.8779,-270.71575 1054.3554,-273.95104 1058.5,-275.42981 z"
1914 id="path7947"
1915 sodipodi:nodetypes="czzzzzzzzzzzzzz" />
1916 </g>
1917 </g>
1918 <path
1919 style="opacity:1;fill:#101414;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1920 d="M 628.24553,347.99185 C 624.4411,322.72762 611.43581,297.35383 611.08873,272.46662 C 610.90247,259.1111 614.36158,245.89571 624.84426,232.91257 C 661.19128,167.61674 741.78517,148.21789 810.77892,141.44715 C 897.70131,130.43035 995.69159,159.38722 1044.1503,236.84843 C 1098.2743,312.58173 1100.825,409.38755 1124.7623,496.37638 C 1154.2001,623.50398 1179.5414,752.59052 1185.1545,883.22673 C 1182.0911,961.40858 1176.7282,1048.4109 1124.6513,1111.3609 C 1076.6248,1161.7183 1001.8649,1161.4139 937.58145,1170.3632 C 847.02606,1175.0182 753.22992,1154.2174 675.79755,1106.165 C 611.02191,1068.225 580.06736,992.68633 578.51815,920.14538 C 570.12898,840.27022 604.90967,766.33687 630.13855,692.98577 C 637.60916,610.2247 639.55141,526.73802 639.79189,443.60093 C 638.95507,411.40549 632.70236,379.7836 628.24553,347.99185 z"
1921 id="path7949"
1922 sodipodi:nodetypes="cscccccccccccc" />
1923 <path
1924 style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8940);enable-background:accumulate"
1925 d="M 311.83409,415.43155 L 321.73359,537.05392 L 261.62951,673.52553 L 277.18586,848.1809 C 292.79912,910.0601 309.37131,946.84995 351.56201,965.23473 C 355.88112,928.99475 312.95049,822.27485 312.31937,776.11489 C 311.68792,729.93044 323.14971,667.50703 342.99704,617.81842 C 363.04539,567.62654 379.89378,572.972 385.12193,525.22549 C 390.35008,477.47898 367.69553,375.83357 367.69553,375.83357 L 311.83409,415.43155 z"
1926 id="path7951"
1927 sodipodi:nodetypes="ccccczzzcc"
1928 clip-path="url(#clipPath8616)"
1929 transform="translate(276,136)" />
1930 <path
1931 style="opacity:1;fill:url(#linearGradient8970);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
1932 d="M 1010.0312,655.49186 C 1010.0312,655.49186 1026.7864,692.50992 1038.7327,709.44581 C 1050.6789,726.3817 1091.4598,765.49186 1091.4598,765.49186 L 1144.057,637.90211"
1933 id="path7953"
1934 sodipodi:nodetypes="czcc" />
1935 <path
1936 style="opacity:0.07999998;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8822);enable-background:accumulate"
1937 d="M 730.31998,536.56864 C 730.31998,545.05392 772.86772,595.03667 772.86772,595.03667 L 785.47431,566.26713 L 730.31998,536.56864 z"
1938 id="path7955"
1939 sodipodi:nodetypes="cccc"
1940 clip-path="url(#clipPath8209)"
1941 transform="translate(276,136)" />
1942 <g
1943 transform="translate(450.03125,73.843964)"
1944 style="opacity:1;display:inline;enable-background:new"
1945 id="g7957"
1946 clip-path="url(#clipPath3998)">
1947 <g
1948 transform="translate(-174.03125,62.156036)"
1949 style="filter:url(#filter3677)"
1950 id="g7959">
1951 <g
1952 id="g7961"
1953 style="filter:url(#filter3785)">
1954 <path
1955 sodipodi:nodetypes="czzzzzzzzzz"
1956 id="path7963"
1957 d="M 425.88244,476.99186 C 436.68787,475.5132 450.62645,480.34637 470.5253,480.20614 C 490.42415,480.06591 527.97852,463.29492 552.66815,463.06328 C 577.35778,462.83164 615.41985,475.34734 631.95387,478.06328 C 648.48789,480.77922 654.80219,477.90476 659.45386,485.92043 C 664.10553,493.9361 661.38057,496.66767 649.09672,506.63472 C 636.81287,516.60177 608.30704,519.27104 583.02529,519.49186 C 557.74295,519.71268 512.644,526.57038 487.66815,523.42042 C 462.6923,520.27046 430.73059,515.59775 418.73958,505.56328 C 406.74857,495.52881 398.88874,488.83146 401.23958,481.63471 C 403.59042,474.43796 415.07701,478.47052 425.88244,476.99186 z"
1958 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1959 <rect
1960 y="412.60312"
1961 x="343.6539"
1962 height="181.01935"
1963 width="381.83765"
1964 id="rect7965"
1965 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
1966 </g>
1967 <g
1968 id="g7967"
1969 style="filter:url(#filter3785)">
1970 <path
1971 sodipodi:nodetypes="czzzcc"
1972 id="path7969"
1973 d="M 687.14286,452.36218 C 676.68117,462.07661 600.16326,471.36732 586.42857,481.6479 C 572.69388,491.92848 571.67605,494.53616 574.28571,501.6479 C 576.89537,508.75964 580.83098,511.05362 600,510.21932 C 619.16902,509.38502 698.57143,482.5976 698.57143,488.79075 L 687.14286,452.36218 z"
1974 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
1975 transform="translate(174.03125,-62.156036)" />
1976 <rect
1977 y="344.82138"
1978 x="702.86414"
1979 height="162.63455"
1980 width="207.8894"
1981 id="rect7971"
1982 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
1983 </g>
1984 </g>
1985 <g
1986 transform="translate(-174.03125,62.156036)"
1987 style="opacity:0.18000004;display:inline;enable-background:new"
1988 id="g7973">
1989 <g
1990 id="g7975"
1991 style="filter:url(#filter3785)">
1992 <path
1993 sodipodi:nodetypes="czzzzzzzzzz"
1994 id="path7977"
1995 d="M 425.88244,476.99186 C 436.68787,475.5132 450.62645,480.34637 470.5253,480.20614 C 490.42415,480.06591 527.97852,463.29492 552.66815,463.06328 C 577.35778,462.83164 615.41985,475.34734 631.95387,478.06328 C 648.48789,480.77922 654.80219,477.90476 659.45386,485.92043 C 664.10553,493.9361 661.38057,496.66767 649.09672,506.63472 C 636.81287,516.60177 608.30704,519.27104 583.02529,519.49186 C 557.74295,519.71268 512.644,526.57038 487.66815,523.42042 C 462.6923,520.27046 430.73059,515.59775 418.73958,505.56328 C 406.74857,495.52881 398.88874,488.83146 401.23958,481.63471 C 403.59042,474.43796 415.07701,478.47052 425.88244,476.99186 z"
1996 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
1997 <rect
1998 y="412.60312"
1999 x="343.6539"
2000 height="181.01935"
2001 width="381.83765"
2002 id="rect7979"
2003 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2004 </g>
2005 <g
2006 id="g7981"
2007 style="filter:url(#filter3785)">
2008 <path
2009 sodipodi:nodetypes="czzzcc"
2010 id="path7983"
2011 d="M 687.14286,452.36218 C 676.68117,462.07661 600.16326,471.36732 586.42857,481.6479 C 572.69388,491.92848 571.67605,494.53616 574.28571,501.6479 C 576.89537,508.75964 580.83098,511.05362 600,510.21932 C 619.16902,509.38502 698.57143,482.5976 698.57143,488.79075 L 687.14286,452.36218 z"
2012 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2013 transform="translate(174.03125,-62.156036)" />
2014 <rect
2015 y="344.82138"
2016 x="702.86414"
2017 height="162.63455"
2018 width="207.8894"
2019 id="rect7985"
2020 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2021 </g>
2022 </g>
2023 </g>
2024 <path
2025 style="opacity:0.75;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8802);enable-background:accumulate"
2026 d="M 582.65599,-7.4183011 L 695.79307,78.848726 L 804.68752,337.64981 L 842.87128,545.5392 L 963.07944,637.46308 C 963.07944,637.46308 950.35151,350.37773 943.28044,323.50767 C 936.20938,296.63761 793.37381,-69.643698 793.37381,-69.643698 L 582.65599,-7.4183011 z"
2027 id="path7987"
2028 clip-path="url(#clipPath8604)"
2029 sodipodi:nodetypes="cccccscc"
2030 transform="translate(276,136)" />
2031 <path
2032 style="opacity:1;fill:url(#linearGradient8958);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2033 d="M 964.13839,239.599 C 964.13839,239.599 972.81571,250.49562 988.24554,251.56328 C 1003.6754,252.63094 1037.9672,211.61061 1058.4241,199.42043 C 1078.9034,187.2169 1105.4705,172.81818 1122.3527,179.06329 C 1139.2348,185.30839 1144.5105,205.49938 1150.2098,227.099 C 1155.9092,248.69861 1156.9284,288.91289 1147.5313,319.95615 C 1138.1341,350.9994 1097.028,393.0599 1082.1741,423.349 C 1067.3202,453.6381 1070.567,463.17043 1070.567,463.17043"
2034 id="path7989"
2035 sodipodi:nodetypes="czzzzzzc" />
2036 <path
2037 style="opacity:1;fill:url(#radialGradient3315);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2038 d="M 1124.4955,207.63471 C 1108.6027,206.74185 1074.7767,219.74054 1058.4241,231.92043 C 1041.9855,244.16433 1029.2032,256.03483 1029.1384,284.06328 C 1029.0732,312.26932 1042.2575,323.13969 1058.2455,331.02757 C 1074.2335,338.91546 1091.9317,338.14685 1110.2098,319.24186 C 1128.488,300.33686 1124.4955,207.63471 1124.4955,207.63471 z"
2039 id="path7991"
2040 sodipodi:nodetypes="czzzzc" />
2041 <path
2042 sodipodi:type="arc"
2043 style="opacity:0.75;fill:url(#radialGradient3543);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4120);enable-background:accumulate"
2044 id="path7993"
2045 sodipodi:cx="385"
2046 sodipodi:cy="237.00504"
2047 sodipodi:rx="86.428574"
2048 sodipodi:ry="73.928574"
2049 d="M 471.42857,237.00504 A 86.428574,73.928574 0 1 1 298.57143,237.00504 A 86.428574,73.928574 0 1 1 471.42857,237.00504 z"
2050 transform="matrix(0.9434749,-0.1239943,0.1440089,1.0957669,451.94827,134.5988)"
2051 clip-path="url(#clipPath4100)" />
2052 <path
2053 transform="translate(450.03125,73.843964)"
2054 style="opacity:1;fill:url(#radialGradient3915);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2055 d="M 527.60588,407.44884 C 527.60588,407.44884 405.56444,445.85232 340.09154,417.08065 C 274.61865,388.30899 265.71429,292.36218 265.71429,292.36218 C 265.71429,292.36218 339.09587,211.85825 395.63507,208.74742 C 451.46212,205.67578 486.20893,228.89074 510.50508,274.59913 C 534.85708,320.41261 527.60588,407.44884 527.60588,407.44884 z"
2056 id="path7995"
2057 sodipodi:nodetypes="csczzc"
2058 mask="url(#mask3684)" />
2059 <path
2060 style="opacity:1;fill:url(#linearGradient3959);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2061 d="M 772.17411,393.349 C 772.17411,393.349 808.39165,365.96653 823.78125,357.45614 C 838.95859,349.06313 849.49553,345.849 859.6741,345.849 L 844.13839,412.81328"
2062 id="path7997"
2063 sodipodi:nodetypes="czcc" />
2064 <path
2065 sodipodi:type="arc"
2066 style="opacity:1;fill:url(#radialGradient3933);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2067 id="path7999"
2068 sodipodi:cx="409.28571"
2069 sodipodi:cy="306.64789"
2070 sodipodi:rx="36.25"
2071 sodipodi:ry="36.25"
2072 d="M 445.53571,306.64789 A 36.25,36.25 0 1 1 373.03571,306.64789 A 36.25,36.25 0 1 1 445.53571,306.64789 z"
2073 transform="translate(449.49554,74.915393)" />
2074 <path
2075 style="opacity:0.3;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8806);enable-background:accumulate"
2076 d="M 311.83409,415.43155 L 321.73359,537.05392 L 261.62951,673.52553 L 277.18586,848.1809 C 292.79912,910.0601 309.37131,946.84995 351.56201,965.23473 C 355.88112,928.99475 360.24362,892.86709 320.31937,742.11489 L 337.99704,672.81842 L 410.12193,534.22549 L 367.69553,375.83357 L 311.83409,415.43155 z"
2077 id="path8001"
2078 sodipodi:nodetypes="cccccccccc"
2079 clip-path="url(#clipPath8616)"
2080 transform="translate(276,136)" />
2081 <path
2082 style="opacity:0.5;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8826);enable-background:accumulate"
2083 d="M 635.21025,581.13004 C 621.06811,593.85796 674.44372,615.71019 711.57778,605.17167 C 748.71184,594.63315 816.22265,569.6073 814.81537,525.97571 C 813.40809,482.34413 738.44784,397.28228 738.44784,397.28228 L 635.21025,581.13004 z"
2084 id="path8003"
2085 sodipodi:nodetypes="czzcc" />
2086 <path
2087 sodipodi:type="arc"
2088 style="opacity:1;fill:url(#radialGradient3991);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2089 id="path8005"
2090 sodipodi:cx="410"
2091 sodipodi:cy="306.64789"
2092 sodipodi:rx="23.214285"
2093 sodipodi:ry="23.214285"
2094 d="M 433.21428,306.64789 A 23.214285,23.214285 0 1 1 386.78572,306.64789 A 23.214285,23.214285 0 1 1 433.21428,306.64789 z"
2095 transform="translate(449.67411,74.915393)" />
2096 <path
2097 sodipodi:type="arc"
2098 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3981);enable-background:accumulate"
2099 id="path8007"
2100 sodipodi:cx="414.28571"
2101 sodipodi:cy="303.07648"
2102 sodipodi:rx="7.5"
2103 sodipodi:ry="7.5"
2104 d="M 421.78571,303.07648 A 7.5,7.5 0 1 1 406.78571,303.07648 A 7.5,7.5 0 1 1 421.78571,303.07648 z"
2105 transform="translate(451.99554,73.486821)" />
2106 <path
2107 style="opacity:1;fill:url(#radialGradient4112);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2108 d="M 789.31696,478.349 C 789.31696,478.349 796.33977,497.91759 788.24553,513.349 C 780.15129,528.78041 745.92236,552.33722 720.74554,563.349 C 695.43582,574.41891 635.27254,596.31293 618.95982,605.31328 C 602.49834,614.39571 600.74554,617.99185 600.74554,617.99185 C 600.74554,617.99185 593.59861,598.92744 629.49553,566.20615 C 665.66764,533.23401 771.52265,518.15665 789.31696,478.349 z"
2109 id="path8009"
2110 sodipodi:nodetypes="czzzczc" />
2111 <g
2112 style="opacity:1;display:inline;enable-background:new"
2113 id="g8011"
2114 transform="translate(780.74553,74.55825)">
2115 <path
2116 transform="translate(-329.81481,0)"
2117 clip-path="url(#clipPath3999)"
2118 sodipodi:nodetypes="czzczzzszc"
2119 id="path8013"
2120 d="M 179.64286,267.36218 C 157.23242,307.0651 119.02676,383.14247 110.35715,417.00504 C 101.70994,450.78014 101.58516,483.42158 110,503.43362 C 118.3602,523.31575 136.16398,539.06642 150.71428,544.86218 C 150.1179,530.48631 165.08723,501.57635 223.57143,472.36218 C 282.1977,443.07704 301.95306,445.23132 327.14285,425.21932 C 352.77291,404.85756 335.34872,345.57268 330.35714,331.29075 C 325.36556,317.00882 329.12051,327.91101 328.41112,326.19774 C 317.72184,300.38182 294.1968,271.76744 263.92857,261.6479 C 233.66034,251.52836 198.91081,256.79953 179.64286,267.36218 z"
2121 style="opacity:1;fill:url(#radialGradient3585);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline" />
2122 <path
2123 transform="matrix(0.8823874,0.4705236,-0.4705236,0.8823874,-166.62245,2.387362)"
2124 d="M 248.28731,338.07648 A 64.715881,134.00607 0 1 1 118.85555,338.07648 A 64.715881,134.00607 0 1 1 248.28731,338.07648 z"
2125 sodipodi:ry="134.00607"
2126 sodipodi:rx="64.715881"
2127 sodipodi:cy="338.07648"
2128 sodipodi:cx="183.57143"
2129 id="path8015"
2130 style="opacity:1;fill:url(#radialGradient4060);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2131 sodipodi:type="arc" />
2132 <path
2133 transform="matrix(0.8823874,0.4705236,-0.4705236,0.8823874,-162.19388,-18.755495)"
2134 d="M 248.28731,338.07648 A 64.715881,134.00607 0 1 1 118.85555,338.07648 A 64.715881,134.00607 0 1 1 248.28731,338.07648 z"
2135 sodipodi:ry="134.00607"
2136 sodipodi:rx="64.715881"
2137 sodipodi:cy="338.07648"
2138 sodipodi:cx="183.57143"
2139 id="path8017"
2140 style="opacity:1;fill:url(#radialGradient4062);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2141 sodipodi:type="arc" />
2142 <path
2143 transform="translate(-329.81481,3e-7)"
2144 clip-path="url(#clipPath3999)"
2145 sodipodi:nodetypes="czzczzzszc"
2146 id="path8019"
2147 d="M 179.64286,267.36218 C 157.23242,307.0651 119.02676,383.14247 110.35715,417.00504 C 101.70994,450.78014 101.58516,483.42158 110,503.43362 C 118.3602,523.31575 136.16398,539.06642 150.71428,544.86218 C 150.1179,530.48631 165.08723,501.57635 223.57143,472.36218 C 282.1977,443.07704 301.95306,445.23132 327.14285,425.21932 C 352.77291,404.85756 335.34872,345.57268 330.35714,331.29075 C 325.36556,317.00882 329.12051,327.91101 328.41112,326.19774 C 317.72184,300.38182 294.1968,271.76744 263.92857,261.6479 C 233.66034,251.52836 198.91081,256.79953 179.64286,267.36218 z"
2148 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient3587);stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter4079);enable-background:new" />
2149 </g>
2150 <path
2151 sodipodi:type="arc"
2152 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2153 id="path8021"
2154 sodipodi:cx="310.71429"
2155 sodipodi:cy="398.07648"
2156 sodipodi:rx="19.704132"
2157 sodipodi:ry="19.704132"
2158 d="M 330.41843,398.07648 A 19.704132,19.704132 0 1 1 291.01016,398.07648 A 19.704132,19.704132 0 1 1 330.41843,398.07648 z"
2159 transform="translate(452.55663,72.581273)" />
2160 <path
2161 sodipodi:type="arc"
2162 style="opacity:1;fill:url(#radialGradient4056);fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4082);stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4083);enable-background:accumulate"
2163 id="path8023"
2164 sodipodi:cx="310.71429"
2165 sodipodi:cy="398.07648"
2166 sodipodi:rx="19.704132"
2167 sodipodi:ry="19.704132"
2168 d="M 330.41843,398.07648 A 19.704132,19.704132 0 1 1 291.01016,398.07648 A 19.704132,19.704132 0 1 1 330.41843,398.07648 z"
2169 transform="translate(450.55663,72.581273)" />
2170 <path
2171 sodipodi:type="arc"
2172 style="opacity:1;fill:url(#radialGradient4119);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2173 id="path8025"
2174 sodipodi:cx="310.71429"
2175 sodipodi:cy="398.07648"
2176 sodipodi:rx="19.704132"
2177 sodipodi:ry="19.704132"
2178 d="M 330.41843,398.07648 A 19.704132,19.704132 0 1 1 291.01016,398.07648 A 19.704132,19.704132 0 1 1 330.41843,398.07648 z"
2179 transform="translate(450.55663,72.581273)" />
2180 <path
2181 sodipodi:type="arc"
2182 style="opacity:1;fill:url(#radialGradient4868);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.99999994px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4002);enable-background:accumulate"
2183 id="path8027"
2184 sodipodi:cx="429.56738"
2185 sodipodi:cy="377.42877"
2186 sodipodi:rx="72.079735"
2187 sodipodi:ry="44.547726"
2188 d="M 501.64712,377.42877 A 72.079735,44.547726 0 1 1 357.48765,377.42877 A 72.079735,44.547726 0 1 1 501.64712,377.42877 z"
2189 transform="matrix(0.9969564,-7.7961675e-2,7.7961675e-2,0.9969564,436.61877,125.29509)"
2190 inkscape:transform-center-x="-47.231976"
2191 inkscape:transform-center-y="-3.6935079" />
2192 <path
2193 sodipodi:type="arc"
2194 style="opacity:1;fill:url(#radialGradient4876);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.99999994px;stroke-linecap:butt;stroke-linejoin:bevel;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4010);enable-background:accumulate"
2195 id="path8029"
2196 sodipodi:cx="437.6991"
2197 sodipodi:cy="391.21735"
2198 sodipodi:rx="36.611931"
2199 sodipodi:ry="22.627417"
2200 d="M 474.31103,391.21735 A 36.611931,22.627417 0 1 1 401.08717,391.21735 A 36.611931,22.627417 0 1 1 474.31103,391.21735 z"
2201 transform="matrix(1.4357951,-6.9991037e-2,6.9991037e-2,1.4357951,235.18065,-63.86546)"
2202 inkscape:transform-center-x="-20.955902"
2203 inkscape:transform-center-y="-13.056625" />
2204 <g
2205 transform="translate(450.03125,73.843964)"
2206 id="g8031"
2207 style="opacity:1;display:inline;filter:url(#filter4053);enable-background:new">
2208 <path
2209 d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z"
2210 sodipodi:ry="3.2142856"
2211 sodipodi:rx="3.2142856"
2212 sodipodi:cy="401.82648"
2213 sodipodi:cx="413.66071"
2214 id="path8033"
2215 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4484);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2216 sodipodi:type="arc" />
2217 <path
2218 transform="translate(13.125009,8.1249913)"
2219 d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z"
2220 sodipodi:ry="3.2142856"
2221 sodipodi:rx="3.2142856"
2222 sodipodi:cy="401.82648"
2223 sodipodi:cx="413.66071"
2224 id="path8035"
2225 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4486);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2226 sodipodi:type="arc" />
2227 <path
2228 transform="translate(32.946437,7.4999913)"
2229 d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z"
2230 sodipodi:ry="3.2142856"
2231 sodipodi:rx="3.2142856"
2232 sodipodi:cy="401.82648"
2233 sodipodi:cx="413.66071"
2234 id="path8037"
2235 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4488);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2236 sodipodi:type="arc" />
2237 <path
2238 transform="translate(24.910723,-10.267866)"
2239 d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z"
2240 sodipodi:ry="3.2142856"
2241 sodipodi:rx="3.2142856"
2242 sodipodi:cy="401.82648"
2243 sodipodi:cx="413.66071"
2244 id="path8039"
2245 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4490);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2246 sodipodi:type="arc" />
2247 <path
2248 transform="translate(47.589294,-0.6250087)"
2249 d="M 416.87499,401.82648 A 3.2142856,3.2142856 0 1 1 410.44642,401.82648 A 3.2142856,3.2142856 0 1 1 416.87499,401.82648 z"
2250 sodipodi:ry="3.2142856"
2251 sodipodi:rx="3.2142856"
2252 sodipodi:cy="401.82648"
2253 sodipodi:cx="413.66071"
2254 id="path8041"
2255 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient4492);stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2256 sodipodi:type="arc" />
2257 </g>
2258 <path
2259 style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2260 d="M 896.20301,482.92837 C 897.1881,487.27845 900.74008,489.10785 903.58974,490.82019 C 908.05042,493.33311 910.1099,492.3423 912.74425,490.06258 C 914.3462,488.14141 923.42736,485.36393 928.33848,482.99151 C 932.66809,481.5326 937.24178,477.63278 941.723,474.65775 C 945.11814,473.03051 947.06964,475.01239 949.55168,475.6679 C 952.4958,476.38451 953.96285,477.83965 955.6126,479.20344 C 958.00876,480.37863 954.6847,482.34657 958.8956,483.49658 C 960.08651,483.71452 961.31255,484.07303 962.17859,482.99151"
2261 id="path8043"
2262 sodipodi:nodetypes="ccccccccc" />
2263 <path
2264 style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2265 d="M 910.85021,475.35223 C 913.16515,475.32025 914.02799,475.99476 916.34292,474.53148 C 919.79856,471.45035 921.74546,471.38671 924.29787,470.11206 C 927.32444,468.79683 930.83357,478.26375 934.3994,479.96105 C 936.79449,479.13963 935.68854,481.75484 935.85149,482.6127 C 935.90862,485.25954 938.65843,486.29076 940.20777,488.04227 C 943.52381,490.29776 947.583,494.33773 951.31945,493.34557 C 957.7647,490.4145 961.59867,492.06411 967.60816,485.95883 C 968.31221,484.77749 967.02391,479.06423 970.70175,478.76149 C 973.22574,479.01487 974.86842,478.81164 976.76267,479.32971 C 982.20367,481.4469 984.50045,485.77971 991.47301,487.28466 C 997.65591,488.25105 999.08565,491.07892 1005.3626,492.33542"
2266 id="path8045"
2267 sodipodi:nodetypes="cccccccccccc" />
2268 <path
2269 style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2270 d="M 876.98133,483.52197 C 879.37991,482.72817 883.08746,487.71407 885.15446,490.56765 C 885.74727,493.24717 886.30823,496.0541 885.91207,502.68948 C 886.6972,505.10702 888.59256,505.72043 890.7103,505.97248 C 893.82775,505.4357 896.58699,504.64818 898.0339,502.94202 C 899.9055,501.00035 903.34643,505.33596 906.11512,506.98263 C 909.72521,508.89472 913.8889,508.96149 917.98442,509.25547 C 919.688,509.02483 920.35482,513.77062 921.26741,517.3367 C 921.65155,521.71476 920.38197,524.23239 919.49965,527.18568 C 919.20535,529.68223 922.48815,530.71542 925.8131,531.73137 C 928.99554,532.47261 932.35734,533.39321 934.90447,533.49914 C 940.04633,534.37405 942.99321,536.18966 947.0263,537.53975 C 949.26544,538.3563 950.28649,539.78191 951.57199,541.07528"
2271 id="path8047"
2272 sodipodi:nodetypes="ccccccccccccc" />
2273 <path
2274 style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter8814);enable-background:new"
2275 d="M 332,187.69519 C 332,187.69519 389.5,162.19519 389.5,159.69519 C 389.5,157.19519 395,107.69519 395,107.69519 C 395,107.69519 486,59.195189 486.5,57.195189 C 487,55.195189 572.5,-4.8048114 572.5,-4.8048114 L 386.5,17.195189 L 311,123.19519 L 332,187.69519 z"
2276 id="path8049"
2277 clip-path="url(#clipPath8514)"
2278 transform="translate(276,136)" />
2279 <path
2280 style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2281 d="M 1697.2846,722.5514 C 1697.2846,722.5514 1581.3191,796.0905 1574.2481,800.33314 C 1567.177,804.57578 1343.7312,937.51186 1343.7312,937.51186 L 1347.9739,977.10984 L 1564.3486,876.70067 L 1681.7283,774.8773 L 1697.2846,722.5514 z"
2282 id="path8051" />
2283 <path
2284 style="opacity:0.5;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8810);enable-background:accumulate"
2285 d="M 528.91587,556.85291 C 523.25902,555.4387 347.89654,631.80623 347.89654,631.80623 L 313.95541,812.82557 L 365.05087,1006.7738 L 622.25397,1074.4551 C 622.25397,1074.4551 828.72915,1227.1901 834.386,1222.9475 C 840.04286,1218.7049 1002.6774,1029.2002 1002.6774,1029.2002 L 842.87128,845.35248 L 796.20224,667.16157 L 528.91587,556.85291 z"
2286 id="path8053"
2287 clip-path="url(#clipPath8610)"
2288 sodipodi:nodetypes="cccccscccc"
2289 transform="translate(276,136)" />
2290 <path
2291 style="opacity:1;fill:#0c0c0c;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2292 d="M 1097.6433,613.88997 C 1097.6433,613.88997 1120.2628,607.38316 1133.386,608.01724 C 1146.5093,608.65133 1164.0276,609.95586 1177.0949,620.20343 C 1190.1622,630.45099 1202.1626,647.3435 1211.2073,678.57308 C 1220.2519,709.80266 1212.9056,777.82509 1205.0312,821.92043 C 1197.1569,866.01577 1176.7661,928.03341 1160.0312,961.92043 C 1143.2964,995.80745 1110.2335,1039.4156 1099.4618,1051.7966 C 1088.0976,1064.8586 1043.2559,1088.2228 1020.0312,1094.0633 C 1025.3346,1083.4567 1068.931,1043.4744 1055.0312,1033.349 C 1041.0123,1023.1367 1009.2712,1079.3314 970.7381,1062.3822 C 992.12041,1049.2501 1012.5175,1011.1961 1004.7787,995.78772 C 996.93846,980.17733 974.07378,1044.5453 911.24317,1032.8006 C 941.29521,1005.2739 966.65023,961.89659 952.50587,949.8209 C 938.09071,937.51403 892.04412,1004.1141 892.04412,1004.1141 C 892.04412,1004.1141 889.22222,962.41287 905.81732,935.50673 C 922.45667,908.52886 985.47029,853.89146 1005.3704,823.80331 C 1025.2706,793.71517 1038.983,757.79429 1047.5059,731.28537 C 1056.0287,704.77645 1063.3068,654.18583 1063.3068,654.18583"
2293 id="path8055"
2294 sodipodi:nodetypes="czzzzzzczczczczzzc" />
2295 <path
2296 style="opacity:0.25;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8818);enable-background:accumulate"
2297 d="M 770.74639,609.17881 L 719.8347,706.75955 L 639.93163,817.77531 L 674.57987,889.19309 L 717.00628,968.38906 L 789.13117,923.13422 L 803.27331,730.80117 L 824.48651,592.20825 L 810.34437,502.05213 L 770.74639,609.17881 z"
2298 id="path8057"
2299 clip-path="url(#clipPath8622)"
2300 sodipodi:nodetypes="cccccccccc"
2301 transform="translate(276,136)" />
2302 <path
2303 style="opacity:1;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8810);enable-background:accumulate"
2304 d="M 295,846.19519 L 301.64488,777.27234 C 301.64488,777.27234 391.96439,866.27691 464,900.19519 C 536.03561,934.11347 772,962.19519 772,962.19519 L 926,936.19519 L 890,1098.1952 L 604,1124.1952 L 306,1035.1952 L 295,846.19519 z"
2305 id="path8059"
2306 clip-path="url(#clipPath8906)"
2307 sodipodi:nodetypes="cczcccccc"
2308 transform="translate(276,136)" />
2309 <path
2310 transform="translate(450.03125,73.843964)"
2311 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter3587);enable-background:new"
2312 d="M 405.79629,845.99023 L 480.74961,911.04406 L 483.24924,927.92446 L 502.6526,938.08337 L 509.14464,961.13446 L 540.85369,952.76336 L 555.70293,1000.8466 C 567.95945,1013.5745 645.49637,887.7369 611.56436,1039.0304 L 550.75318,1055.2939 L 461.55026,960.60104 L 398.72523,906.80141 L 405.79629,845.99023 z"
2313 id="path8061"
2314 sodipodi:nodetypes="cccccccccccc"
2315 clip-path="url(#clipPath3602)" />
2316 <path
2317 style="opacity:1;fill:#121212;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate"
2318 d="M 1159.317,918.349 C 1213.6027,916.92043 1285.352,903.29701 1329.317,891.56328 C 1373.3697,879.80614 1455.2033,855.21604 1504.674,833.70614 C 1554.0133,812.25342 1618.2778,774.42454 1658.9599,741.56329 C 1699.468,708.8426 1711.3498,685.74348 1719.6741,707.99186 C 1728.0432,730.35965 1703.2672,764.31748 1681.817,789.06329 C 1660.2128,813.98669 1629.0856,841.76862 1582.8883,878.349 C 1536.691,914.92938 1426.8058,979.93363 1370.0312,1006.9204 C 1312.9652,1034.0458 1241.8279,1065.1589 1197.8884,1079.4205 C 1153.9489,1093.6821 1066.4598,1110.4919 1066.4598,1110.4919 L 1159.317,918.349 z"
2319 id="path8063"
2320 sodipodi:nodetypes="czzzzzzzzcc" />
2321 <path
2322 transform="translate(450.03125,73.843964)"
2323 style="opacity:0.5;fill:url(#linearGradient3666);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3779);enable-background:accumulate"
2324 d="M 1241.5965,652.95007 C 1241.5965,652.95007 1176.875,707.28713 1095.9326,751.94501 C 1013.9082,797.19985 811.67556,845.28311 811.67556,845.28311 C 811.67556,845.28311 796.57419,866.33507 856.93045,873.56739 C 917.28671,880.79971 1081.0124,820.2667 1135.5306,777.40085 C 1190.0488,734.535 1255.7387,665.67799 1255.7387,665.67799 L 1241.5965,652.95007 z"
2325 id="path8065"
2326 sodipodi:nodetypes="czczzcc"
2327 clip-path="url(#clipPath3992)" />
2328 <g
2329 transform="translate(450.03125,73.843964)"
2330 style="opacity:1;display:inline;enable-background:new"
2331 id="g8067"
2332 clip-path="url(#clipPath3986)">
2333 <g
2334 transform="translate(-174.03125,62.156036)"
2335 style="filter:url(#filter3677)"
2336 id="g8069">
2337 <g
2338 style="filter:url(#filter3785)"
2339 id="g8071">
2340 <path
2341 sodipodi:nodetypes="czzccccc"
2342 id="path8073"
2343 d="M 1094.2857,725.93361 C 1094.2857,725.93361 1093.9896,752.09452 1098.9285,763.79076 C 1103.8674,775.487 1118.9666,790.27741 1127.5,795.21933 C 1136.0334,800.16125 1146.4286,803.79075 1146.4286,803.79075 L 1264.2857,688.79075 L 1282.1429,613.07647 L 1185.7143,651.6479 L 1094.2857,725.93361 z"
2344 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2345 transform="translate(174.03125,-62.156036)" />
2346 <rect
2347 y="486.14224"
2348 x="1197.8389"
2349 height="309.71277"
2350 width="333.75412"
2351 id="rect8075"
2352 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2353 </g>
2354 </g>
2355 <g
2356 transform="translate(-174.03125,62.156036)"
2357 style="opacity:0.18000004;display:inline;enable-background:new"
2358 id="g8077">
2359 <g
2360 style="filter:url(#filter3785)"
2361 id="g8079">
2362 <path
2363 sodipodi:nodetypes="czzccccc"
2364 id="path8081"
2365 d="M 1094.2857,725.93361 C 1094.2857,725.93361 1093.9896,752.09452 1098.9285,763.79076 C 1103.8674,775.487 1118.9666,790.27741 1127.5,795.21933 C 1136.0334,800.16125 1146.4286,803.79075 1146.4286,803.79075 L 1264.2857,688.79075 L 1282.1429,613.07647 L 1185.7143,651.6479 L 1094.2857,725.93361 z"
2366 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2367 transform="translate(174.03125,-62.156036)" />
2368 <rect
2369 y="486.14224"
2370 x="1197.8389"
2371 height="309.71277"
2372 width="333.75412"
2373 id="rect8083"
2374 style="opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2375 </g>
2376 </g>
2377 </g>
2378 <path
2379 transform="translate(450.03125,73.843964)"
2380 style="opacity:0.83300003;fill:#050505;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:15;stroke-linecap:butt;stroke-linejoin:miter;marker:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter8225);enable-background:accumulate"
2381 d="M 1264.1875,605 C 1259.6964,605.73268 1256.0305,608.45509 1252.25,611.40625 C 1242.1687,619.27601 1224.0805,645.83149 1204.2188,661.875 C 1164.3514,694.07816 1100.2228,731.85201 1051.6562,752.96875 C 1003.0422,774.10613 921.11498,798.78676 877.34375,810.46875 C 833.94554,822.05121 762.29972,835.59982 709.09375,837 L 704.53125,837.125 L 702.53125,841.25 L 609.6875,1033.375 L 603.1875,1046.8438 L 617.84375,1044 C 617.84375,1044 705.11343,1027.3486 750.1875,1012.7188 C 794.9127,998.20213 865.97836,967.05197 923.21875,939.84375 C 980.82199,912.46306 1090.1551,847.86412 1137.5,810.375 C 1183.8608,773.66518 1215.3049,745.65818 1237.4375,720.125 C 1248.3386,707.549 1260.1823,692.59356 1268.4688,677.375 C 1276.7552,662.15644 1287.6285,633.15692 1282.1562,618.53125 C 1280.9385,615.27651 1279.6048,612.46995 1277.5625,610.03125 C 1275.5202,607.59255 1269.0878,608.45926 1269,605 C 1268.7902,596.73518 1265.6845,604.75577 1264.1875,605 z M 1266.3438,620.21875 C 1266.7586,620.80449 1267.3749,621.77641 1268.125,623.78125 C 1271.0218,631.52338 1266.6843,655.68 1259.3125,669.21875 C 1251.9407,682.7575 1236.6741,698.14269 1226.125,710.3125 C 1205.0496,734.62606 1174.2213,762.17406 1128.1875,798.625 C 1083.1379,834.29659 972.72717,899.71959 916.78125,926.3125 C 859.88952,953.35499 788.68509,984.4309 745.53125,998.4375 C 709.16634,1010.2406 649.68654,1022.2713 629.8125,1026.2188 L 714.09375,851.75 C 768.80066,849.7007 837.88634,836.53365 881.21875,824.96875 C 925.55297,813.1365 1007.2974,788.63242 1057.625,766.75 C 1107.737,744.96129 1170.1594,705.58184 1211.6562,672.0625 C 1232.3026,655.38529 1253.4011,629.51662 1261.4688,623.21875 C 1263.9058,621.31633 1265.5494,620.58295 1266.3438,620.21875 z"
2382 id="path8085"
2383 clip-path="url(#clipPath3722)"
2384 sodipodi:nodetypes="cssssccccccssssssssccssssssccssssc" />
2385 <g
2386 style="opacity:1;display:inline;enable-background:new"
2387 id="g8087"
2388 mask="url(#mask7704)"
2389 transform="matrix(0.9934486,0.1142802,-0.1142802,0.9934486,-9.24324,588.09054)"
2390 inkscape:transform-center-x="-185.09603"
2391 inkscape:transform-center-y="-12.859654">
2392 <path
2393 transform="translate(8.0045714e-2,-3.125e-2)"
2394 style="fill:#bcb786;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2395 d="M 1111.4062,-285.9375 L 1107.4688,-284.0625 C 1107.4283,-284.05228 1107.3692,-284.04201 1107.3438,-284.03125 C 1106.925,-283.8184 1107.1791,-283.93067 1106.6875,-283.71875 C 1106.2014,-283.50919 1104.9499,-283.13456 1102.5938,-282.25 C 1099.2626,-280.99942 1096.7895,-280.10016 1095.5938,-279.1875 C 1094.0576,-279.16623 1091.8733,-278.95419 1089.9375,-278.46875 C 1086.956,-277.72108 1085.0823,-277.29474 1083.1875,-276.875 C 1081.2927,-276.45527 1081.512,-276.23281 1080.3125,-276 C 1079.0159,-275.74833 1078.5911,-276.00899 1074.875,-275.21875 C 1071.3851,-274.4766 1065.9802,-273.28768 1064.7188,-272.53125 C 1063.1348,-272.71203 1060.8513,-272.85303 1058.875,-272.5625 C 1055.8346,-272.11554 1053.9588,-271.88974 1052.0312,-271.65625 C 1051.3758,-271.57687 1050.9902,-271.45547 1050.6875,-271.375 C 1050.2613,-271.24334 1050.0017,-271.11498 1049.3125,-271.03125 C 1048.0009,-270.87188 1047.5503,-271.18808 1043.7812,-270.75 C 1040.2273,-270.33691 1034.7758,-269.47718 1033.5312,-268.8125 C 1031.9322,-269.10979 1029.6735,-269.34669 1027.6875,-269.15625 C 1024.6287,-268.86293 1022.7155,-268.67226 1020.7812,-268.5 C 1018.847,-268.32773 1019.0926,-268.07763 1017.875,-267.96875 C 1016.5588,-267.85105 1016.1152,-268.13238 1012.3438,-267.71875 C 1008.8017,-267.3303 1003.3359,-266.50948 1002.0625,-265.84375 C 1000.4636,-266.13844 998.1753,-266.35076 996.1875,-266.15625 C 993.12921,-265.857 991.2463,-265.67601 989.3125,-265.5 C 988.65501,-265.44015 988.27245,-265.32144 987.96875,-265.25 C 987.54105,-265.13104 987.28525,-265.03193 986.59375,-264.96875 C 985.27775,-264.84849 984.834,-265.16363 981.0625,-264.75 C 977.50631,-264.35998 972.0569,-263.51084 970.8125,-262.84375 C 969.21381,-263.13793 966.95265,-263.36747 964.96875,-263.15625 C 961.91305,-262.83092 959.9947,-262.63001 958.0625,-262.4375 C 956.13031,-262.24499 956.37275,-261.99662 955.15625,-261.875 C 953.84137,-261.74353 953.3932,-262.03954 949.625,-261.59375 C 946.08611,-261.17509 940.6473,-260.30158 939.375,-259.625 C 937.77741,-259.90604 935.51505,-260.04543 933.53125,-259.8125 C 930.47927,-259.45413 928.58625,-259.24464 926.65625,-259.03125 C 926.00007,-258.95869 925.6156,-258.85856 925.3125,-258.78125 C 924.88571,-258.65402 924.6276,-258.51405 923.9375,-258.4375 C 922.62411,-258.29181 922.17015,-258.61152 918.40625,-258.125 C 914.85737,-257.66624 909.4276,-256.70598 908.1875,-256 C 906.59441,-256.24424 904.3537,-256.38135 902.375,-256.125 C 899.32741,-255.73018 897.4243,-255.47655 895.5,-255.21875 C 893.57571,-254.96096 893.7739,-254.72522 892.5625,-254.5625 C 891.25301,-254.3866 890.8153,-254.66688 887.0625,-254.09375 C 883.53821,-253.55551 878.1393,-252.39458 876.875,-251.65625 C 875.28751,-251.85979 873.0295,-251.91098 871.0625,-251.5625 C 868.03631,-251.02638 866.1636,-250.70081 864.25,-250.375 C 863.59941,-250.26423 863.2363,-250.10406 862.9375,-250 C 862.51681,-249.83512 862.27405,-249.6687 861.59375,-249.53125 C 860.29905,-249.26966 859.86665,-249.53745 856.15625,-248.71875 C 852.65777,-247.9468 847.31035,-246.33582 846.09375,-245.5 C 844.53085,-245.57745 842.33625,-245.41472 840.40625,-244.90625 C 837.43387,-244.12312 835.58855,-243.67416 833.71875,-243.15625 C 831.84875,-242.63835 832.0521,-242.38897 830.875,-242.0625 C 829.60251,-241.7096 829.17795,-241.95541 825.53125,-240.875 C 822.10657,-239.86037 816.88185,-237.94183 815.65625,-237.03125 C 814.11747,-237.01851 811.93645,-236.75903 810.03125,-236.15625 C 807.10027,-235.22891 805.2809,-234.69783 803.4375,-234.09375 C 802.81071,-233.88837 802.44585,-233.70117 802.15625,-233.5625 C 801.74867,-233.34889 801.50295,-233.15375 800.84375,-232.9375 C 799.58925,-232.52596 799.1576,-232.74846 795.5625,-231.5 C 792.17261,-230.32283 786.96755,-228.2863 785.78125,-227.34375 C 784.25737,-227.28408 782.1312,-226.94888 780.25,-226.28125 C 777.35261,-225.25296 775.55095,-224.60577 773.71875,-223.96875 C 771.88655,-223.33174 772.0909,-223.12021 770.9375,-222.71875 C 769.69071,-222.28479 769.27395,-222.51903 765.71875,-221.15625 C 762.38005,-219.87645 757.23165,-217.6737 756.03125,-216.6875 C 754.52407,-216.57981 752.39555,-216.1887 750.53125,-215.46875 C 747.66307,-214.36115 745.90735,-213.68719 744.09375,-213 C 743.47705,-212.76637 743.0973,-212.55797 742.8125,-212.40625 C 742.81251,-212.40625 742.8125,-212.37673 742.8125,-212.375 L 734.8125,-209.1875 L 722.3366,-205.69561 L 730.26626,-186.41789 C 729.67463,-184.44432 742.8125,-191.15625 742.8125,-191.15625 C 743.03891,-191.30093 743.26145,-191.42886 743.53125,-191.53125 C 744.61177,-191.94123 745.70285,-191.74702 749.53125,-193.21875 C 753.35977,-194.69049 754.7553,-195.22373 755.4375,-195.625 C 756.11711,-196.02478 757.04925,-196.50437 757.65625,-197.15625 C 759.48317,-197.294 761.22705,-197.64948 762.59375,-198.15625 C 765.56175,-199.25677 767.4691,-199.96244 769.375,-200.625 C 771.28081,-201.28754 771.72915,-202.03987 772.78125,-202.40625 C 773.87287,-202.78636 774.97635,-202.57163 778.84375,-203.9375 C 782.71115,-205.30336 784.1269,-205.76458 784.8125,-206.15625 C 785.51361,-206.55677 786.5133,-207.08923 787.125,-207.75 C 789.09581,-207.80466 790.94195,-208.13463 792.40625,-208.625 C 795.40777,-209.63008 797.3324,-210.24671 799.25,-210.875 C 800.78861,-211.3791 801.42415,-211.92177 802.15625,-212.3125 C 802.38647,-212.44681 802.63215,-212.56623 802.90625,-212.65625 C 804.00457,-213.01673 805.0877,-212.73762 809,-213.96875 C 812.91231,-215.19988 814.366,-215.6417 815.0625,-216 C 815.75641,-216.35697 816.6926,-216.79261 817.3125,-217.40625 C 819.17771,-217.42891 820.94835,-217.67308 822.34375,-218.09375 C 825.37415,-219.00729 827.33615,-219.52385 829.28125,-220.0625 C 831.22637,-220.60114 831.70745,-221.32702 832.78125,-221.625 C 833.89527,-221.93415 835.00125,-221.61761 838.96875,-222.65625 C 842.93625,-223.69488 844.38625,-224.08898 845.09375,-224.40625 C 845.82855,-224.73584 846.90765,-225.15997 847.53125,-225.78125 C 849.52907,-225.66525 851.3887,-225.80134 852.875,-226.15625 C 855.95311,-226.89125 857.9584,-227.25719 859.9375,-227.65625 C 861.52541,-227.97643 862.1818,-228.4468 862.9375,-228.75 C 863.17501,-228.8568 863.4044,-228.94276 863.6875,-229 C 864.82091,-229.22919 865.99215,-228.79107 870.03125,-229.5 C 874.07067,-230.20893 875.5315,-230.42709 876.25,-230.6875 C 876.96581,-230.94694 877.95435,-231.25474 878.59375,-231.78125 C 880.51795,-231.54176 882.34165,-231.55672 883.78125,-231.78125 C 886.90767,-232.26887 888.9358,-232.48192 890.9375,-232.75 C 892.93921,-233.01807 893.42625,-233.69514 894.53125,-233.84375 C 895.67767,-233.99793 896.8071,-233.54218 900.875,-234.0625 C 904.94281,-234.58282 906.43525,-234.75823 907.15625,-235 C 907.89337,-235.24714 908.95435,-235.58623 909.59375,-236.125 C 911.64375,-235.78947 913.56745,-235.72704 915.09375,-235.90625 C 918.23595,-236.27521 920.27375,-236.46561 922.28125,-236.6875 C 923.89207,-236.86552 924.5459,-237.2957 925.3125,-237.53125 C 925.55341,-237.61677 925.80655,-237.68685 926.09375,-237.71875 C 927.24345,-237.84647 928.39505,-237.3721 932.46875,-237.84375 C 936.54245,-238.3154 938.0278,-238.45435 938.75,-238.6875 C 939.46941,-238.91977 940.45025,-239.16096 941.09375,-239.65625 C 943.03005,-239.32279 944.8638,-239.25201 946.3125,-239.40625 C 949.45851,-239.7412 951.49,-239.92484 953.5,-240.125 C 955.50991,-240.32514 955.98415,-240.95139 957.09375,-241.0625 C 958.24485,-241.17778 959.39025,-240.69744 963.46875,-241.125 C 967.54725,-241.55256 969.05765,-241.68709 969.78125,-241.90625 C 970.52047,-242.13011 971.57685,-242.4195 972.21875,-242.9375 C 974.27575,-242.53883 976.2206,-242.4441 977.75,-242.59375 C 980.89871,-242.90185 982.9258,-243.067 984.9375,-243.25 C 986.55151,-243.39682 987.20055,-243.81055 987.96875,-244.03125 C 988.21005,-244.11211 988.4623,-244.16116 988.75,-244.1875 C 989.90211,-244.29295 991.0429,-243.79475 995.125,-244.1875 C 999.20711,-244.58025 1000.7139,-244.71834 1001.4375,-244.9375 C 1002.1584,-245.15583 1003.1371,-245.3852 1003.7812,-245.875 C 1005.7193,-245.52501 1007.5501,-245.42062 1009,-245.5625 C 1012.1487,-245.8706 1014.1758,-246.03575 1016.1875,-246.21875 C 1018.1991,-246.40174 1018.7017,-247.05677 1019.8125,-247.15625 C 1020.9648,-247.25948 1022.1047,-246.77142 1026.1875,-247.15625 C 1030.2704,-247.54107 1031.7762,-247.65725 1032.5,-247.875 C 1033.2393,-248.09743 1034.2956,-248.38949 1034.9375,-248.90625 C 1036.9949,-248.50448 1038.9404,-248.40292 1040.4688,-248.5625 C 1043.6153,-248.89102 1045.6458,-249.0852 1047.6562,-249.28125 C 1049.2692,-249.43854 1049.9219,-249.91273 1050.6875,-250.15625 C 1050.9282,-250.24429 1051.1507,-250.27762 1051.4375,-250.3125 C 1052.5858,-250.4522 1053.7542,-249.97259 1057.8125,-250.5625 C 1061.8708,-251.15242 1063.3743,-251.33964 1064.0938,-251.59375 C 1064.8104,-251.84691 1065.7684,-252.15182 1066.4062,-252.6875 C 1068.3259,-252.47556 1070.1262,-252.53609 1071.5625,-252.78125 C 1074.6816,-253.31365 1076.6741,-253.70986 1078.6562,-254.09375 C 1080.6383,-254.47762 1081.1305,-255.1334 1082.2188,-255.375 C 1083.3475,-255.62566 1084.489,-255.25871 1088.4688,-256.25 C 1092.4483,-257.24127 1093.8983,-257.6693 1094.5938,-258.03125 C 1095.316,-258.40725 1096.3555,-258.90183 1096.9688,-259.5625 C 1098.9317,-259.57454 1100.7625,-259.85355 1102.1875,-260.40625 C 1105.1387,-261.55085 1107.0607,-262.27567 1108.875,-263.15625 C 1110.3307,-263.86277 1111.1941,-264.85828 1111.4062,-265.15625 C 1111.6185,-265.4542 1111.5051,-265.8848 1111.5312,-265.90625 C 1111.5742,-265.94148 1111.8716,-266.00028 1112.0312,-266.34375 C 1112.8902,-268.19082 1114.3544,-271.97139 1114.4688,-272.65625 C 1114.5825,-273.33839 1114.6368,-274.00902 1114.6875,-274.40625 C 1114.7169,-274.63575 1114.5404,-275.28515 1114.5625,-275.34375 C 1114.5934,-275.42579 1114.8508,-275.59432 1114.9062,-275.84375 C 1115.1725,-277.04206 1114.9953,-278.05111 1114.7812,-279.46875 C 1114.5673,-280.88638 1113.8096,-284.08338 1113.1562,-284.9375 C 1112.4973,-285.79922 1111.9314,-285.94801 1111.4062,-285.9375 z"
2396 id="path8089"
2397 sodipodi:nodetypes="ccssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssscccccssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssssssssc" />
2398 <g
2399 clip-path="url(#clipPath7421)"
2400 id="g8091">
2401 <path
2402 id="path8093"
2403 d="M 1107.409,-284.04961 C 1106.9903,-283.83678 1107.2534,-283.95572 1106.7618,-283.7438 C 1106.2757,-283.53426 1105.0384,-283.16941 1102.6822,-282.28485 C 1099.3511,-281.03428 1096.852,-280.13141 1095.6563,-279.21875 C 1094.1202,-279.19749 1091.9358,-278.98544 1090,-278.5 C 1087.0185,-277.75234 1085.1448,-277.32599 1083.25,-276.90625 C 1081.3552,-276.48653 1081.5745,-276.26406 1080.375,-276.03125 C 1079.0784,-275.77959 1078.6536,-276.04024 1074.9375,-275.25 C 1071.4476,-274.50786 1066.0427,-273.31893 1064.7813,-272.5625 C 1063.1974,-272.74329 1060.9138,-272.88428 1058.9375,-272.59375 C 1055.8971,-272.1468 1054.0213,-271.92099 1052.0938,-271.6875 C 1051.4384,-271.60813 1051.0527,-271.48672 1050.75,-271.40625 C 1050.3238,-271.2746 1050.0642,-271.14623 1049.375,-271.0625 C 1048.0634,-270.90314 1047.6128,-271.21933 1043.8438,-270.78125 C 1040.2899,-270.36817 1034.8384,-269.50843 1033.5938,-268.84375 C 1031.9948,-269.14105 1029.736,-269.37794 1027.75,-269.1875 C 1024.6912,-268.89419 1022.778,-268.70351 1020.8438,-268.53125 C 1018.9095,-268.35899 1019.1551,-268.10888 1017.9375,-268 C 1016.6213,-267.88231 1016.1777,-268.16363 1012.4063,-267.75 C 1008.8644,-267.36156 1003.3984,-266.54073 1002.125,-265.875 C 1000.5261,-266.1697 998.23783,-266.38201 996.25,-266.1875 C 993.19176,-265.88826 991.30887,-265.70726 989.375,-265.53125 C 988.71751,-265.47141 988.33496,-265.35269 988.03125,-265.28125 C 987.6036,-265.1623 987.34774,-265.06318 986.65625,-265 C 985.34029,-264.87975 984.89649,-265.19488 981.125,-264.78125 C 977.56886,-264.39124 972.11946,-263.54209 970.875,-262.875 C 969.27637,-263.16919 967.01516,-263.39872 965.03125,-263.1875 C 961.97565,-262.86218 960.05721,-262.66126 958.125,-262.46875 C 956.19279,-262.27625 956.43513,-262.02787 955.21875,-261.90625 C 953.90387,-261.77479 953.45577,-262.07079 949.6875,-261.625 C 946.14863,-261.20635 940.70982,-260.33283 939.4375,-259.65625 C 937.83995,-259.9373 935.57754,-260.07668 933.59375,-259.84375 C 930.54177,-259.48539 928.64867,-259.27589 926.71875,-259.0625 C 926.06255,-258.98995 925.67809,-258.88981 925.375,-258.8125 C 924.94823,-258.68528 924.69009,-258.5453 924,-258.46875 C 922.68667,-258.32307 922.23254,-258.64277 918.46875,-258.15625 C 914.91986,-257.6975 909.49012,-256.73723 908.25,-256.03125 C 906.65695,-256.27549 904.41619,-256.4126 902.4375,-256.15625 C 899.38991,-255.76144 897.48679,-255.5078 895.5625,-255.25 C 893.63822,-254.99221 893.83639,-254.75647 892.625,-254.59375 C 891.31554,-254.41785 890.87781,-254.69813 887.125,-254.125 C 883.60071,-253.58676 878.20185,-252.42583 876.9375,-251.6875 C 875.35,-251.89104 873.092,-251.94223 871.125,-251.59375 C 868.09883,-251.05763 866.22611,-250.73206 864.3125,-250.40625 C 863.66189,-250.29548 863.29879,-250.13531 863,-250.03125 C 862.57933,-249.86637 862.33655,-249.69995 861.65625,-249.5625 C 860.3616,-249.30091 859.92913,-249.5687 856.21875,-248.75 C 852.72022,-247.97805 847.3728,-246.36707 846.15625,-245.53125 C 844.59347,-245.6087 842.39867,-245.44597 840.46875,-244.9375 C 837.49631,-244.15437 835.65114,-243.70541 833.78125,-243.1875 C 831.91137,-242.6696 832.11465,-242.42022 830.9375,-242.09375 C 829.66504,-241.74085 829.24047,-241.98666 825.59375,-240.90625 C 822.16905,-239.89162 816.94431,-237.97308 815.71875,-237.0625 C 814.17992,-237.04976 811.99892,-236.79028 810.09375,-236.1875 C 807.16269,-235.26016 805.34344,-234.72908 803.5,-234.125 C 802.87324,-233.91962 802.50827,-233.73242 802.21875,-233.59375 C 801.81112,-233.38014 801.56541,-233.185 800.90625,-232.96875 C 799.65179,-232.55721 799.22014,-232.77971 795.625,-231.53125 C 792.23515,-230.35408 787.03002,-228.31755 785.84375,-227.375 C 784.31986,-227.31533 782.1937,-226.98013 780.3125,-226.3125 C 777.41511,-225.28421 775.61342,-224.63702 773.78125,-224 C 771.94908,-223.36299 772.1534,-223.15146 771,-222.75 C 769.75322,-222.31604 769.33639,-222.55028 765.78125,-221.1875 C 762.44258,-219.9077 757.2941,-217.70495 756.09375,-216.71875 C 754.58657,-216.61106 752.45806,-216.21995 750.59375,-215.5 C 747.72557,-214.3924 745.96995,-213.71844 744.15625,-213.03125 C 743.53959,-212.79762 743.15984,-212.58922 742.875,-212.4375 C 742.875,-212.4375 742.875,-211.34375 742.875,-211.34375 C 742.98678,-211.56611 743.26099,-212.16118 743.78125,-212.4375 C 744.47922,-212.80822 748.59488,-214.43087 750.59375,-215.15625 C 752.25061,-215.7575 754.74764,-216.48493 756.5625,-216.46875 C 756.86412,-216.46606 757.15012,-216.41785 757.40625,-216.375 C 759.24874,-216.06675 764.875,-214.8125 764.875,-214.8125 C 764.87499,-214.8125 758.64151,-216.45925 757.84375,-216.65625 C 757.65296,-216.70336 757.30803,-216.72497 756.875,-216.71875 C 758.02046,-217.58846 761.636,-219.11226 764.21875,-220.15625 C 767.05697,-221.30352 767.33556,-221.40807 769.28125,-221.8125 C 771.28955,-222.22994 772.4375,-222.3125 772.4375,-222.3125 C 772.4375,-222.31249 772.35514,-222.91364 773.40625,-223.4375 C 774.11135,-223.78891 778.29327,-225.3299 780.3125,-226 C 782.2644,-226.64773 785.3699,-227.3585 787.1875,-227 C 789.05073,-226.6325 794.71875,-225.1875 794.71875,-225.1875 C 794.71876,-225.1875 788.43175,-227.05861 787.625,-227.28125 C 787.43208,-227.3345 787.09416,-227.36729 786.65625,-227.375 C 787.81459,-228.20788 791.45069,-229.57032 794.0625,-230.53125 C 796.93266,-231.58726 797.22984,-231.69305 799.1875,-232.0625 C 801.04099,-232.41229 802.04634,-232.48798 802.21875,-232.5 C 802.33235,-232.71724 802.5962,-233.31002 803.125,-233.5625 C 803.83444,-233.90124 808.05107,-235.27525 810.09375,-235.875 C 811.78692,-236.37211 814.33452,-236.91177 816.1875,-236.78125 C 816.49545,-236.75957 816.80099,-236.68399 817.0625,-236.625 C 818.94368,-236.20068 824.65625,-234.59375 824.65625,-234.59375 C 824.65626,-234.59375 818.31451,-236.659 817.5,-236.90625 C 817.30521,-236.96539 816.94212,-237.01019 816.5,-237.03125 C 817.66949,-237.8288 821.36302,-239.08747 824,-239.96875 C 826.89781,-240.93722 827.23301,-240.97207 829.21875,-241.25 C 831.2684,-241.53689 832.40625,-241.5625 832.40625,-241.5625 C 832.40623,-241.5625 832.3335,-242.16947 833.40625,-242.625 C 834.12585,-242.93057 838.39723,-244.12575 840.46875,-244.625 C 842.47119,-245.10758 845.66724,-245.55329 847.53125,-245.03125 C 849.44203,-244.4961 855.25,-242.53125 855.25,-242.53125 C 855.25,-242.53125 848.82734,-244.95476 848,-245.25 C 847.80216,-245.32061 847.41784,-245.39039 846.96875,-245.4375 C 848.15665,-246.16615 851.88402,-247.21158 854.5625,-247.9375 C 857.50592,-248.73525 857.85458,-248.70833 859.875,-248.84375 C 861.78789,-248.97198 862.82205,-248.91484 863,-248.90625 C 863.11728,-249.10991 863.39176,-249.68573 863.9375,-249.875 C 864.66969,-250.12894 869.01602,-250.92289 871.125,-251.25 C 872.87313,-251.52111 875.52588,-251.7347 877.4375,-251.34375 C 877.75516,-251.27879 878.04272,-251.15824 878.3125,-251.0625 C 880.25324,-250.37377 886.15625,-247.96875 886.15625,-247.96875 C 886.15626,-247.96875 879.62154,-250.91952 878.78125,-251.28125 C 878.58028,-251.36776 878.20612,-251.44804 877.75,-251.53125 C 878.9565,-252.16443 882.77956,-252.92685 885.5,-253.4375 C 888.48953,-253.99869 888.80023,-253.96704 890.84375,-253.96875 C 892.95301,-253.97052 894.15625,-253.84375 894.15625,-253.84375 C 894.15625,-253.84374 894.08354,-254.47494 895.1875,-254.78125 C 895.92802,-254.98672 900.31362,-255.61512 902.4375,-255.84375 C 904.49052,-256.06474 907.75613,-256.09597 909.65625,-255.375 C 911.60404,-254.63593 917.5,-252 917.5,-252 C 917.50002,-252 910.93712,-255.17897 910.09375,-255.5625 C 909.89207,-255.65423 909.55154,-255.74871 909.09375,-255.84375 C 910.30467,-256.44563 914.07817,-257.09259 916.8125,-257.5 C 919.8173,-257.94772 920.13801,-257.9517 922.1875,-257.90625 C 924.12795,-257.86323 925.19449,-257.71202 925.375,-257.6875 C 925.49392,-257.88066 925.7589,-258.45333 926.3125,-258.59375 C 927.05521,-258.78213 931.46679,-259.32803 933.59375,-259.53125 C 935.35678,-259.69967 938.01384,-259.76554 939.9375,-259.28125 C 940.25718,-259.20077 940.54101,-259.07766 940.8125,-258.96875 C 942.76543,-258.18526 948.71875,-255.5 948.71875,-255.5 C 948.71873,-255.5 942.12684,-258.75348 941.28125,-259.15625 C 941.07903,-259.25257 940.70899,-259.36328 940.25,-259.46875 C 941.46414,-260.04302 945.29366,-260.59094 948.03125,-260.96875 C 951.03963,-261.38395 951.35432,-261.41138 953.40625,-261.34375 C 955.52423,-261.27394 956.71875,-261.09375 956.71875,-261.09375 C 956.71873,-261.09375 956.6415,-261.73116 957.75,-262 C 958.49362,-262.18035 962.90176,-262.66355 965.03125,-262.84375 C 967.08972,-263.01792 970.37449,-262.96807 972.28125,-262.1875 C 974.23584,-261.38734 980.15625,-258.65625 980.15625,-258.65625 C 980.15623,-258.65625 973.59632,-261.96501 972.75,-262.375 C 972.54763,-262.47305 972.17814,-262.5781 971.71875,-262.6875 C 972.93392,-263.2514 976.72883,-263.8018 979.46875,-264.15625 C 982.47966,-264.54577 982.79006,-264.5539 984.84375,-264.46875 C 986.78814,-264.38815 987.85038,-264.21551 988.03125,-264.1875 C 988.15041,-264.37836 988.41402,-264.93281 988.96875,-265.0625 C 989.71301,-265.2365 994.11868,-265.71297 996.25,-265.875 C 998.01662,-266.00927 1000.6997,-266.00071 1002.625,-265.5 C 1002.945,-265.41679 1003.2283,-265.29873 1003.5,-265.1875 C 1005.4546,-264.38734 1011.4063,-261.625 1011.4063,-261.625 C 1011.4062,-261.625 1004.8151,-264.96501 1003.9688,-265.375 C 1003.7664,-265.47305 1003.3969,-265.57811 1002.9375,-265.6875 C 1004.1526,-266.2514 1007.9788,-266.77056 1010.7188,-267.125 C 1013.7297,-267.51453 1014.0713,-267.5539 1016.125,-267.46875 C 1018.2447,-267.38087 1019.4375,-267.15625 1019.4375,-267.15625 C 1019.4375,-267.15625 1019.3591,-267.80527 1020.4688,-268.0625 C 1021.2131,-268.23506 1025.6183,-268.68586 1027.75,-268.84375 C 1029.8106,-268.99635 1033.0929,-268.94052 1035,-268.15625 C 1036.955,-267.3523 1042.875,-264.65625 1042.875,-264.65625 C 1042.875,-264.65625 1036.3152,-267.93212 1035.4688,-268.34375 C 1035.2663,-268.44219 1034.897,-268.54597 1034.4375,-268.65625 C 1035.6529,-269.21779 1039.4494,-269.78403 1042.1875,-270.15625 C 1045.1965,-270.5653 1045.5102,-270.57183 1047.5625,-270.5 C 1049.5056,-270.43201 1050.5697,-270.33515 1050.75,-270.3125 C 1050.8688,-270.5069 1051.1346,-271.04131 1051.6875,-271.1875 C 1052.4293,-271.38362 1056.8186,-272.01628 1058.9375,-272.28125 C 1060.6939,-272.50086 1063.3428,-272.61356 1065.25,-272.25 C 1065.5669,-272.18959 1065.8558,-272.06062 1066.125,-271.96875 C 1068.0612,-271.30783 1073.9688,-269.03125 1073.9688,-269.03125 C 1073.9687,-269.03125 1067.4321,-271.8378 1066.5938,-272.1875 C 1066.3933,-272.27113 1066.0176,-272.36083 1065.5625,-272.4375 C 1066.7662,-273.08796 1070.5816,-273.80945 1073.2813,-274.4375 C 1076.248,-275.1277 1076.5702,-275.19257 1078.5938,-275.3125 C 1080.6824,-275.4363 1081.875,-275.34375 1081.875,-275.34375 C 1081.875,-275.34374 1081.788,-275.9758 1082.875,-276.375 C 1083.6042,-276.6428 1087.9222,-277.71297 1090,-278.1875 C 1092.0085,-278.64619 1095.1679,-279.2168 1097,-278.8125 C 1098.8781,-278.39804 1110.5782,-275.79687 1110.5782,-275.79687 C 1110.5782,-275.79687 1098.2507,-278.81953 1097.4375,-279.0625 C 1097.243,-279.12062 1096.8789,-279.16876 1096.4375,-279.1875 C 1097.6051,-279.99119 1099.9517,-280.8748 1102.5469,-281.89062 C 1104.2283,-282.5488 1103.4706,-282.26721 1105.3228,-282.89422 C 1107.0764,-283.48788 1107.8082,-283.90493 1107.9532,-284.00721 C 1108.2993,-284.21372 1107.5972,-284.12909 1107.409,-284.04961 z"
2404 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7001);enable-background:new"
2405 sodipodi:nodetypes="czscsssscssssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssccsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscc" />
2406 <path
2407 id="path8095"
2408 d="M 1082.625,-275.125 C 1084.498,-274.73152 1087.1211,-273.97945 1088.6563,-273.15625 C 1090.1915,-272.33306 1091.4785,-272.10025 1094.0313,-270.65625 C 1096.5579,-269.22699 1098.8271,-268.64929 1101,-268.125 C 1103.3476,-267.55858 1106.4354,-267.40977 1109.8438,-266.9375 C 1108.7549,-267.77725 1103.2364,-268.10995 1101.4375,-268.5 C 1099.6386,-268.89006 1097.5434,-269.51616 1094.8438,-270.8125 C 1092.1441,-272.10884 1091.3494,-272.61146 1089.0313,-273.5 C 1086.7131,-274.38854 1085.0269,-274.88314 1082.625,-275.125 z"
2409 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6949);enable-background:new" />
2410 <path
2411 id="path8097"
2412 d="M 1051.4688,-270 C 1053.3741,-269.42241 1055.9969,-268.38428 1057.5625,-267.40625 C 1059.1281,-266.42823 1060.4427,-266.04644 1063.0625,-264.28125 C 1065.6555,-262.53409 1068.0484,-261.57198 1070.3125,-260.6875 C 1072.7586,-259.73193 1075.9951,-259.03037 1079.7188,-257.625 C 1078.5292,-258.76284 1072.6557,-260.31175 1070.7813,-261 C 1068.9068,-261.68825 1066.6995,-262.5662 1063.9063,-264.28125 C 1061.113,-265.99629 1060.3327,-266.56515 1057.9688,-267.6875 C 1055.6047,-268.80984 1053.9121,-269.52205 1051.4688,-270 z"
2413 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6961);enable-background:new" />
2414 <path
2415 id="path8099"
2416 d="M 1020.2188,-266.84375 C 1022.1307,-266.20564 1024.8,-265.08839 1026.375,-264.03125 C 1027.9501,-262.9741 1029.2706,-262.52258 1031.9063,-260.625 C 1034.5149,-258.74679 1036.9347,-257.59497 1039.2188,-256.5625 C 1041.6865,-255.44705 1044.9833,-254.3892 1048.75,-252.71875 C 1047.5467,-253.94128 1041.5472,-256.03298 1039.6563,-256.84375 C 1037.7653,-257.65452 1035.5914,-258.73754 1032.7813,-260.59375 C 1029.9711,-262.44995 1029.1595,-263.07068 1026.7813,-264.3125 C 1024.403,-265.5543 1022.6706,-266.28819 1020.2188,-266.84375 z"
2417 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6957);enable-background:new" />
2418 <path
2419 id="path8101"
2420 d="M 1110.1719,-266.89063 C 1110.3227,-266.84207 1110.8599,-266.25963 1110.2813,-265.40625 C 1109.4712,-264.21166 1104.5764,-262.08196 1101.7188,-261.28125 C 1098.8739,-260.48413 1095.4287,-260.30351 1091.1563,-261.65625 C 1086.8547,-263.0182 1085.6866,-264.12497 1080.5,-265.96875 C 1085.164,-263.85358 1086.6953,-262.01642 1090.625,-260.625 C 1092.2457,-260.05113 1093.9921,-259.6854 1095.6875,-259.59375 C 1095.2424,-259.26812 1094.1572,-258.61045 1092.125,-258 C 1089.3295,-257.16031 1085.4759,-256.46622 1083.875,-256.375 C 1082.3604,-256.28868 1080.733,-256.88749 1080.4375,-257 C 1080.6042,-256.89692 1080.8107,-256.62266 1080.1875,-255.96875 C 1079.2882,-255.02512 1074.0401,-254.04575 1071.0625,-253.71875 C 1068.0982,-253.3932 1064.5409,-253.73471 1060.1563,-255.625 C 1056.1783,-257.33997 1054.8173,-258.54036 1050.75,-260.375 C 1050.75,-260.375 1050.75,-260.21875 1050.75,-260.21875 C 1054.3931,-258.12346 1056.034,-256.33548 1059.625,-254.65625 C 1061.3552,-253.84716 1063.2167,-253.24749 1065.0313,-252.9375 C 1064.4964,-252.65074 1063.4735,-252.22599 1061.5938,-251.90625 C 1058.7248,-251.41829 1054.7848,-251.09011 1053.1563,-251.15625 C 1052.3056,-251.19079 1051.4277,-251.34062 1050.75,-251.5625 C 1050.0652,-251.77738 1049.5603,-252.00717 1049.4375,-252.0625 C 1049.6069,-251.95529 1049.8686,-251.65962 1049.2188,-251.03125 C 1048.3091,-250.15163 1042.9727,-249.69487 1039.9688,-249.5625 C 1036.9783,-249.43071 1033.3799,-250.01313 1028.9688,-252.125 C 1024.5276,-254.25126 1023.3273,-255.5266 1018.0625,-257.90625 C 1022.7968,-255.30921 1024.349,-253.27715 1028.4063,-251.1875 C 1030.0796,-250.32565 1031.8915,-249.69325 1033.6563,-249.25 C 1033.193,-249.01668 1032.0669,-248.56186 1029.9688,-248.3125 C 1027.0825,-247.96952 1023.1342,-247.81962 1021.5,-247.9375 C 1019.9538,-248.049 1018.2688,-248.79446 1017.9688,-248.9375 C 1018.1379,-248.81721 1018.3826,-248.52702 1017.75,-247.9375 C 1016.8372,-247.08677 1011.5059,-246.67538 1008.5,-246.5625 C 1005.5075,-246.45013 1001.9103,-247.05293 997.5,-249.15625 C 993.49875,-251.06448 992.11197,-252.29408 988.03125,-254.25 C 988.03122,-254.25 988.03125,-254.09375 988.03125,-254.09375 C 991.68631,-251.88983 993.32546,-250.0412 996.9375,-248.1875 C 998.67779,-247.29435 1000.5745,-246.65923 1002.4063,-246.21875 C 1001.8663,-245.97045 1000.8282,-245.60342 998.9375,-245.375 C 996.05182,-245.02642 992.07145,-244.85405 990.4375,-244.96875 C 989.58405,-245.02865 988.71119,-245.22666 988.03125,-245.46875 C 987.34415,-245.70405 986.8419,-245.94101 986.71875,-246 C 986.88873,-245.88773 987.18323,-245.57775 986.53125,-244.96875 C 985.6186,-244.11625 980.25592,-243.67538 977.25,-243.5625 C 974.25754,-243.45013 970.65654,-244.09055 966.25,-246.15625 C 961.81347,-248.23603 960.60312,-249.48796 955.34375,-251.8125 C 960.07313,-249.26501 961.63449,-247.2347 965.6875,-245.1875 C 967.35905,-244.34317 969.17304,-243.72107 970.9375,-243.28125 C 970.47427,-243.04703 969.3478,-242.59718 967.25,-242.34375 C 964.36431,-241.99517 960.4138,-241.77423 958.78125,-241.875 C 957.23669,-241.97032 955.58094,-242.70385 955.28125,-242.84375 C 955.45024,-242.72522 955.66317,-242.4399 955.03125,-241.84375 C 954.11939,-240.98347 948.7846,-240.5135 945.78125,-240.375 C 942.7913,-240.2371 939.2138,-240.82568 934.8125,-242.84375 C 930.81942,-244.67464 929.44739,-245.87295 925.375,-247.75 C 925.37498,-247.75 925.375,-247.59375 925.375,-247.59375 C 929.02261,-245.46048 930.64533,-243.65888 934.25,-241.875 C 935.98675,-241.01549 937.85727,-240.42486 939.6875,-240 C 939.14803,-239.7471 938.13687,-239.35871 936.25,-239.09375 C 933.37022,-238.68939 929.41187,-238.44813 927.78125,-238.53125 C 926.92953,-238.57466 926.05355,-238.7398 925.375,-238.96875 C 924.68931,-239.19076 924.1854,-239.41214 924.0625,-239.46875 C 924.23209,-239.35976 924.4944,-239.0591 923.84375,-238.4375 C 922.93296,-237.56736 917.59354,-237.04598 914.59375,-236.875 C 911.60742,-236.70479 908.01994,-237.19077 903.625,-239.15625 C 899.20011,-241.13513 898.01904,-242.38444 892.78125,-244.53125 C 897.49122,-242.14358 899.05142,-240.14252 903.09375,-238.1875 C 904.7609,-237.38119 906.55418,-236.79092 908.3125,-236.40625 C 907.85087,-236.15755 906.7155,-235.694 904.625,-235.375 C 901.7494,-234.93624 897.8446,-234.6419 896.21875,-234.6875 C 894.68052,-234.73062 892.98595,-235.43272 892.6875,-235.5625 C 892.85583,-235.44968 893.09807,-235.14875 892.46875,-234.53125 C 891.56063,-233.64015 886.2658,-233.003 883.28125,-232.71875 C 880.31007,-232.43577 876.70783,-232.89455 872.34375,-234.65625 C 868.38441,-236.25456 867.0146,-237.45112 863,-238.96875 C 863.00003,-238.96875 863,-238.8125 863,-238.8125 C 866.5959,-237.00115 868.23831,-235.23017 871.8125,-233.65625 C 873.53457,-232.8979 875.39998,-232.3673 877.21875,-232.03125 C 876.68266,-231.75217 875.65217,-231.34362 873.78125,-230.96875 C 870.92586,-230.39665 866.99183,-229.94936 865.375,-229.9375 C 864.53049,-229.93129 863.66892,-230.01844 863,-230.1875 C 862.32409,-230.34901 861.83991,-230.51673 861.71875,-230.5625 C 861.88597,-230.46848 862.14142,-230.17902 861.5,-229.5 C 860.60213,-228.54948 855.31352,-227.58292 852.375,-227.0625 C 849.44966,-226.54441 845.94285,-226.68826 841.65625,-228.09375 C 837.34045,-229.50882 836.18348,-230.62369 831.09375,-232.0625 C 835.6706,-230.31149 837.1823,-228.50244 841.125,-227.0625 C 842.75108,-226.46861 844.49385,-226.10685 846.21875,-225.90625 C 845.7659,-225.60923 844.66397,-225.02286 842.625,-224.4375 C 839.82028,-223.63233 835.98614,-222.86167 834.40625,-222.6875 C 832.9115,-222.5227 831.29002,-223.00431 831,-223.09375 C 831.16356,-223.00368 831.39278,-222.73382 830.78125,-222.03125 C 829.89878,-221.0174 824.73673,-219.6596 821.84375,-218.96875 C 818.96373,-218.28097 815.50815,-218.20873 811.28125,-219.40625 C 807.4464,-220.4927 806.10867,-221.47862 802.21875,-222.53125 C 802.21874,-222.53125 802.21875,-222.375 802.21875,-222.375 C 805.70293,-220.98015 807.28816,-219.4556 810.75,-218.34375 C 812.41793,-217.80803 814.20578,-217.55701 815.96875,-217.46875 C 815.44911,-217.11663 814.46836,-216.55423 812.65625,-215.9375 C 809.89059,-214.99625 806.06601,-214.00213 804.5,-213.78125 C 803.68206,-213.66586 802.8669,-213.65842 802.21875,-213.75 C 801.56379,-213.83321 801.08615,-213.96827 800.96875,-214 C 801.13079,-213.92536 801.40274,-213.65956 800.78125,-212.90625 C 799.91125,-211.85172 794.77162,-210.247 791.90625,-209.46875 C 789.05372,-208.69399 785.64713,-208.51055 781.46875,-209.5625 C 777.26192,-210.62163 776.11206,-211.60416 771.125,-212.71875 C 775.60954,-211.25929 777.09435,-209.58352 780.9375,-208.46875 C 782.52254,-208.00898 784.22429,-207.8305 785.90625,-207.78125 C 785.46468,-207.44449 784.39374,-206.75352 782.40625,-206 C 779.67232,-204.96351 775.95427,-203.83731 774.40625,-203.5625 C 772.94163,-203.30248 771.34667,-203.67904 771.0625,-203.75 C 771.22275,-203.67035 771.44294,-203.42902 770.84375,-202.6875 C 769.97909,-201.61744 764.92723,-199.86935 762.09375,-199 C 759.27295,-198.13453 755.88625,-197.84369 751.75,-198.78125 C 747.99741,-199.63186 746.70215,-200.49772 742.875,-201.375 C 742.875,-201.375 742.875,-201.21875 742.875,-201.21875 C 746.30296,-199.98096 747.86241,-198.58645 751.25,-197.6875 C 752.88216,-197.25436 754.61704,-197.10449 756.34375,-197.125 C 755.83482,-196.74083 754.867,-196.10318 753.09375,-195.375 C 750.38741,-194.26366 746.65742,-193.06719 745.125,-192.75 C 744.3246,-192.58431 743.51269,-192.53138 742.875,-192.59375 C 742.875,-192.59375 742.875,-192.07823 742.875,-191.67146 C 742.875,-191.40639 742.875,-191.1875 742.875,-191.1875 C 743.10145,-191.33218 743.32391,-191.46011 743.59375,-191.5625 C 744.67427,-191.97248 745.76536,-191.77827 749.59375,-193.25 C 753.42218,-194.72174 754.81787,-195.25498 755.5,-195.65625 C 756.1796,-196.05603 757.11165,-196.53562 757.71875,-197.1875 C 759.5456,-197.32525 761.2895,-197.68073 762.65625,-198.1875 C 765.62437,-199.28802 767.53162,-199.99369 769.4375,-200.65625 C 771.34336,-201.31879 771.79159,-202.07112 772.84375,-202.4375 C 773.9353,-202.81761 775.03886,-202.60288 778.90625,-203.96875 C 782.7737,-205.33461 784.18941,-205.79583 784.875,-206.1875 C 785.57609,-206.58802 786.57581,-207.12048 787.1875,-207.78125 C 789.1583,-207.83591 791.00435,-208.16588 792.46875,-208.65625 C 795.47023,-209.66133 797.3949,-210.27796 799.3125,-210.90625 C 800.8511,-211.41035 801.48652,-211.95302 802.21875,-212.34375 C 802.44891,-212.47806 802.69449,-212.59748 802.96875,-212.6875 C 804.06698,-213.04798 805.1502,-212.76887 809.0625,-214 C 812.97483,-215.23113 814.42855,-215.67295 815.125,-216.03125 C 815.81888,-216.38822 816.75515,-216.82386 817.375,-217.4375 C 819.24021,-217.46016 821.01081,-217.70433 822.40625,-218.125 C 825.43668,-219.03854 827.39863,-219.5551 829.34375,-220.09375 C 831.28886,-220.63239 831.76993,-221.35827 832.84375,-221.65625 C 833.95776,-221.9654 835.06369,-221.64886 839.03125,-222.6875 C 842.99886,-223.72613 844.44883,-224.12023 845.15625,-224.4375 C 845.89112,-224.76709 846.97008,-225.19122 847.59375,-225.8125 C 849.59149,-225.6965 851.45118,-225.83259 852.9375,-226.1875 C 856.01561,-226.9225 858.02094,-227.28844 860,-227.6875 C 861.58792,-228.00768 862.24429,-228.47805 863,-228.78125 C 863.23757,-228.88805 863.46695,-228.97401 863.75,-229.03125 C 864.88347,-229.26044 866.05448,-228.82232 870.09375,-229.53125 C 874.13308,-230.24018 875.594,-230.45834 876.3125,-230.71875 C 877.02836,-230.97819 878.01678,-231.28599 878.65625,-231.8125 C 880.58052,-231.57301 882.40413,-231.58797 883.84375,-231.8125 C 886.97008,-232.30012 888.9983,-232.51317 891,-232.78125 C 893.00171,-233.04932 893.48869,-233.72639 894.59375,-233.875 C 895.74014,-234.02918 896.86967,-233.57343 900.9375,-234.09375 C 905.00534,-234.61407 906.49763,-234.78948 907.21875,-235.03125 C 907.95585,-235.27839 909.01684,-235.61748 909.65625,-236.15625 C 911.70632,-235.82072 913.63003,-235.75829 915.15625,-235.9375 C 918.29856,-236.30646 920.33619,-236.49686 922.34375,-236.71875 C 923.95451,-236.89677 924.60842,-237.32695 925.375,-237.5625 C 925.61594,-237.64802 925.86912,-237.7181 926.15625,-237.75 C 927.30603,-237.87772 928.45754,-237.40335 932.53125,-237.875 C 936.60499,-238.34665 938.09034,-238.4856 938.8125,-238.71875 C 939.53196,-238.95102 940.51274,-239.19221 941.15625,-239.6875 C 943.09262,-239.35404 944.92631,-239.28326 946.375,-239.4375 C 949.52102,-239.77245 951.55256,-239.95609 953.5625,-240.15625 C 955.57246,-240.35639 956.04664,-240.98264 957.15625,-241.09375 C 958.30739,-241.20903 959.45268,-240.72869 963.53125,-241.15625 C 967.60986,-241.58381 969.12011,-241.71834 969.84375,-241.9375 C 970.5829,-242.16136 971.63947,-242.45075 972.28125,-242.96875 C 974.33835,-242.57008 976.28312,-242.47535 977.8125,-242.625 C 980.96123,-242.9331 982.98834,-243.09825 985,-243.28125 C 986.61407,-243.42807 987.2631,-243.8418 988.03125,-244.0625 C 988.27267,-244.14336 988.52478,-244.19241 988.8125,-244.21875 C 989.96461,-244.3242 991.10546,-243.826 995.1875,-244.21875 C 999.26958,-244.6115 1000.7764,-244.74959 1001.5,-244.96875 C 1002.2209,-245.18708 1003.1997,-245.41645 1003.8438,-245.90625 C 1005.7818,-245.55626 1007.6126,-245.45187 1009.0625,-245.59375 C 1012.2112,-245.90185 1014.2383,-246.067 1016.25,-246.25 C 1018.2616,-246.43299 1018.7642,-247.08802 1019.875,-247.1875 C 1021.0273,-247.29073 1022.1672,-246.80267 1026.25,-247.1875 C 1030.3329,-247.57232 1031.8387,-247.6885 1032.5625,-247.90625 C 1033.3018,-248.12868 1034.3581,-248.42074 1035,-248.9375 C 1037.0574,-248.53573 1039.0029,-248.43417 1040.5313,-248.59375 C 1043.6779,-248.92227 1045.7084,-249.11645 1047.7188,-249.3125 C 1049.3318,-249.46979 1049.9844,-249.94398 1050.75,-250.1875 C 1050.9907,-250.27554 1051.2132,-250.30887 1051.5,-250.34375 C 1052.6483,-250.48345 1053.8167,-250.00384 1057.875,-250.59375 C 1061.9333,-251.18367 1063.4368,-251.37089 1064.1563,-251.625 C 1064.873,-251.87816 1065.8308,-252.18307 1066.4688,-252.71875 C 1068.3885,-252.50681 1070.1887,-252.56734 1071.625,-252.8125 C 1074.7441,-253.3449 1076.7366,-253.74111 1078.7188,-254.125 C 1080.7009,-254.50887 1081.1931,-255.16465 1082.2813,-255.40625 C 1083.4101,-255.65691 1084.5516,-255.28996 1088.5313,-256.28125 C 1092.5109,-257.27253 1093.9609,-257.70055 1094.6563,-258.0625 C 1095.3786,-258.43851 1096.4182,-258.93308 1097.0313,-259.59375 C 1098.9943,-259.6058 1100.825,-259.8848 1102.25,-260.4375 C 1105.2012,-261.58211 1107.1232,-262.30692 1108.9375,-263.1875 C 1110.3932,-263.89403 1111.2723,-264.87391 1111.4844,-265.17188 C 1111.6966,-265.46984 1111.5962,-265.91718 1111.6223,-265.93863 C 1111.6652,-265.97387 1111.9416,-266.0236 1112.1013,-266.36707 C 1112.9602,-268.21415 1114.4223,-272.01166 1114.5365,-272.69652 C 1114.6502,-273.37868 1114.7003,-274.04426 1114.751,-274.44149 C 1114.7804,-274.67101 1114.6043,-275.30693 1114.6264,-275.36553 C 1114.6573,-275.44759 1114.9309,-275.63081 1114.9863,-275.88024 C 1115.2526,-277.07857 1115.0752,-278.07153 1114.8612,-279.48917 C 1114.6472,-280.90681 1113.8775,-284.11131 1113.2243,-284.96543 C 1112.5654,-285.82715 1112.0014,-285.9766 1111.4764,-285.96609 C 1111.2678,-285.69633 1111.6132,-285.703 1111.639,-285.65348 C 1112.3196,-285.60269 1112.573,-285.28484 1113.0582,-284.75686 C 1113.5434,-284.22888 1114.501,-280.8173 1114.6376,-279.36691 C 1114.7742,-277.91652 1114.8276,-276.50671 1114.5496,-275.89827 C 1114.2715,-275.28982 1113.6054,-275.46963 1113.313,-275.40375 C 1113.844,-275.21786 1114.2038,-275.19053 1114.2654,-274.34607 C 1114.3247,-273.53269 1114.1322,-272.70638 1113.7456,-271.54045 C 1113.3544,-270.36044 1111.9004,-267.19047 1111.4599,-266.94168 C 1111.0076,-266.68617 1110.5075,-266.75969 1110.1719,-266.89063 z"
2421 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6997);enable-background:new"
2422 sodipodi:nodetypes="cssscscsscsssccscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssccscsscscssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsszsszssszzcczzzczzzc" />
2423 <path
2424 id="path8103"
2425 d="M 988.75,-263.84375 C 990.66161,-263.20935 993.30027,-262.08534 994.875,-261.03125 C 996.44977,-259.97716 997.7711,-259.54873 1000.4063,-257.65625 C 1003.0145,-255.78311 1005.4332,-254.64103 1007.7188,-253.59375 C 1010.1881,-252.46228 1013.4709,-251.43901 1017.25,-249.65625 C 1016.0428,-250.91465 1010.111,-253.0207 1008.2188,-253.84375 C 1006.3266,-254.66679 1004.0908,-255.77424 1001.2813,-257.625 C 998.47169,-259.47575 997.65906,-260.10654 995.28125,-261.34375 C 992.90343,-262.58094 991.20137,-263.29295 988.75,-263.84375 z"
2426 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6953);enable-background:new" />
2427 <path
2428 id="path8105"
2429 d="M 957.5,-260.78125 C 959.41,-260.16315 962.08288,-259.07191 963.65625,-258.03125 C 965.22964,-256.99059 966.55233,-256.54873 969.1875,-254.65625 C 971.79573,-252.7831 974.21442,-251.64104 976.5,-250.59375 C 978.96931,-249.46228 982.25213,-248.439 986.03125,-246.65625 C 984.82397,-247.91465 978.82971,-250.05195 976.9375,-250.875 C 975.04533,-251.69804 972.84084,-252.8055 970.03125,-254.65625 C 967.22167,-256.507 966.4383,-257.09557 964.0625,-258.3125 C 961.68672,-259.52941 959.94929,-260.25135 957.5,-260.78125 z"
2430 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6993);enable-background:new" />
2431 <path
2432 id="path8107"
2433 d="M 926.09375,-257.375 C 928.00147,-256.77755 930.64723,-255.71116 932.21875,-254.6875 C 933.79025,-253.66385 935.08897,-253.24779 937.71875,-251.40625 C 940.32166,-249.58352 942.74762,-248.43405 945.03125,-247.40625 C 947.49845,-246.29584 950.7866,-245.31302 954.5625,-243.5625 C 953.35627,-244.8106 947.3906,-246.88059 945.5,-247.6875 C 943.60942,-248.4944 941.39758,-249.57854 938.59375,-251.375 C 935.7899,-253.17144 934.96671,-253.77751 932.59375,-254.96875 C 930.22078,-256.15999 928.54013,-256.87158 926.09375,-257.375 z"
2434 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6989);enable-background:new" />
2435 <path
2436 id="path8109"
2437 d="M 894.90625,-253.5625 C 896.80838,-253.00895 899.49326,-251.97363 901.0625,-250.96875 C 902.63173,-249.96388 903.93651,-249.56011 906.5625,-247.75 C 909.16162,-245.95836 911.56284,-244.87811 913.84375,-243.875 C 916.30803,-242.79126 919.60359,-241.83471 923.375,-240.125 C 922.1702,-241.36007 916.20084,-243.36978 914.3125,-244.15625 C 912.42418,-244.94272 910.2373,-245.98705 907.4375,-247.75 C 904.63773,-249.51294 903.83831,-250.11836 901.46875,-251.28125 C 899.09918,-252.44413 897.3455,-253.11537 894.90625,-253.5625 z"
2438 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6985);enable-background:new" />
2439 <path
2440 id="path8111"
2441 d="M 863.71875,-248.65625 C 865.59937,-248.22716 868.22302,-247.27587 869.78125,-246.34375 C 871.33948,-245.41164 872.63358,-245.08599 875.25,-243.34375 C 877.83971,-241.61931 880.23067,-240.63573 882.5,-239.71875 C 884.95176,-238.72806 888.23959,-237.84168 892,-236.21875 C 890.79869,-237.42609 884.84751,-239.28484 882.96875,-240 C 881.09,-240.71517 878.88335,-241.68442 876.09375,-243.375 C 873.30412,-245.06557 872.50914,-245.60322 870.15625,-246.65625 C 867.80333,-247.70926 866.13041,-248.36873 863.71875,-248.65625 z"
2442 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6965);enable-background:new" />
2443 <path
2444 id="path8113"
2445 d="M 833.15625,-241.375 C 835.00461,-241.07856 837.6257,-240.39868 839.15625,-239.59375 C 840.68683,-238.78882 841.96999,-238.53802 844.53125,-237.0625 C 847.06629,-235.60204 849.42193,-234.73741 851.65625,-234 C 854.07024,-233.20332 857.31336,-232.53311 861.03125,-231.15625 C 859.84354,-232.28498 853.94353,-233.746 852.09375,-234.3125 C 850.24398,-234.879 848.09033,-235.68642 845.34375,-237.15625 C 842.59718,-238.62608 841.84239,-239.07653 839.53125,-239.9375 C 837.2201,-240.79845 835.52654,-241.25759 833.15625,-241.375 z"
2446 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6981);enable-background:new" />
2447 <path
2448 id="path8115"
2449 d="M 802.90625,-232.3125 C 804.72845,-232.10123 807.27201,-231.51193 808.78125,-230.78125 C 810.2905,-230.05059 811.53693,-229.85127 814.0625,-228.5 C 816.56226,-227.16254 818.89404,-226.45157 821.09375,-225.84375 C 823.47028,-225.18708 826.65839,-224.77087 830.3125,-223.65625 C 829.14515,-224.70121 823.38362,-225.75954 821.5625,-226.21875 C 819.74139,-226.67796 817.61025,-227.34571 814.90625,-228.65625 C 812.20222,-229.96677 811.43519,-230.37615 809.15625,-231.125 C 806.8773,-231.87383 805.243,-232.30431 802.90625,-232.3125 z"
2450 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6977);enable-background:new" />
2451 <path
2452 id="path8117"
2453 d="M 773.1875,-222.1875 C 774.99859,-222.0088 777.50809,-221.52244 779,-220.84375 C 780.49194,-220.16506 781.7534,-220.04553 784.25,-218.78125 C 786.72107,-217.52987 789.04005,-216.88511 791.21875,-216.34375 C 793.57262,-215.75887 796.71009,-215.44623 800.3125,-214.5 C 799.16166,-215.49116 793.45999,-216.2833 791.65625,-216.6875 C 789.85253,-217.0917 787.74072,-217.70866 785.0625,-218.9375 C 782.38432,-220.16634 781.65905,-220.54839 779.40625,-221.21875 C 777.15346,-221.88909 775.50998,-222.22107 773.1875,-222.1875 z"
2454 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6973);enable-background:new" />
2455 <path
2456 id="path8119"
2457 d="M 743.5625,-211.1875 C 745.35531,-211.05839 747.83563,-210.63785 749.3125,-210 C 750.7894,-209.36215 752.0286,-209.25844 754.5,-208.0625 C 756.94618,-206.87878 759.22054,-206.31584 761.375,-205.84375 C 763.70267,-205.33372 766.7946,-205.16311 770.375,-204.28125 C 769.23121,-205.25185 763.62741,-205.8719 761.84375,-206.21875 C 760.06008,-206.56559 757.9609,-207.10631 755.3125,-208.25 C 752.66409,-209.39368 751.91755,-209.76631 749.6875,-210.375 C 747.45742,-210.98368 745.86156,-211.28466 743.5625,-211.1875 z"
2458 style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter6969);enable-background:new" />
2459 <g
2460 id="g8121"
2461 style="fill:#ffffff;fill-opacity:1;filter:url(#filter7345)">
2462 <path
2463 sodipodi:nodetypes="czzzczzc"
2464 id="path8123"
2465 d="M 744.9375,-212.11731 C 744.9375,-212.11731 752.15979,-215.34049 754,-215.61731 C 755.84021,-215.89413 757.35225,-215.62054 760,-215.05481 C 762.64775,-214.48908 768.7357,-212.83963 771.1875,-211.67981 C 773.6393,-210.51999 776.5,-208.11731 776.5,-208.11731 C 776.5,-208.11731 769.35356,-210.8975 766.3125,-211.67981 C 763.27144,-212.46212 758.66789,-213.76355 755.9375,-213.99231 C 753.20711,-214.22107 744.9375,-212.11731 744.9375,-212.11731 z"
2466 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
2467 <path
2468 sodipodi:nodetypes="czzzczzc"
2469 id="path8125"
2470 d="M 735.46875,-206.95416 C 735.46875,-206.95416 739.12854,-209.17734 740.96875,-209.45416 C 742.80896,-209.73098 744.6335,-209.20739 747.28125,-208.64166 C 749.929,-208.07593 756.01695,-206.42648 758.46875,-205.26666 C 760.92055,-204.10684 765.03125,-203.14166 765.03125,-203.14166 C 765.03125,-203.14166 756.63481,-204.48435 753.59375,-205.26666 C 750.55269,-206.04897 745.63664,-207.6004 742.90625,-207.82916 C 740.17586,-208.05792 735.46875,-206.95416 735.46875,-206.95416 z"
2471 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2472 <path
2473 sodipodi:nodetypes="czzzczzc"
2474 id="path8127"
2475 d="M 759.85042,-217.61116 C 759.85042,-217.61116 768.39412,-220.90973 770.2482,-221.06902 C 772.10229,-221.22832 773.88986,-220.58982 776.4963,-219.85694 C 779.10274,-219.12406 785.07354,-217.091 787.44666,-215.77769 C 789.81978,-214.46438 793.86083,-213.23987 793.86083,-213.23987 C 793.86083,-213.23987 785.5667,-215.11352 782.58152,-216.08754 C 779.59633,-217.06156 774.78883,-218.92232 772.0785,-219.32416 C 769.36817,-219.726 759.85042,-217.61116 759.85042,-217.61116 z"
2476 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2477 <path
2478 sodipodi:nodetypes="czzzczzc"
2479 id="path8129"
2480 d="M 775.19813,-223.2266 C 775.19813,-223.2266 782.96946,-226.00904 784.82644,-226.13009 C 786.68341,-226.25113 788.45744,-225.57592 791.04822,-224.78947 C 793.63899,-224.00302 799.56662,-221.8473 801.91216,-220.48535 C 804.25771,-219.1234 808.27265,-217.81585 808.27265,-217.81585 C 808.27265,-217.81585 800.01892,-219.86008 797.05444,-220.89543 C 794.08997,-221.93078 789.32185,-223.89024 786.62038,-224.34786 C 783.91891,-224.80549 775.19813,-223.2266 775.19813,-223.2266 z"
2481 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2482 <path
2483 inkscape:transform-center-y="-4.3190906"
2484 inkscape:transform-center-x="13.852145"
2485 sodipodi:nodetypes="czzzczzc"
2486 id="path8131"
2487 d="M 789.64298,-227.95417 C 789.64298,-227.95417 798.32554,-231.47448 800.18452,-231.55952 C 802.04349,-231.64455 803.8041,-230.9351 806.37915,-230.09859 C 808.9542,-229.2621 814.83894,-226.99193 817.15766,-225.58479 C 819.47638,-224.17764 823.46523,-222.79255 823.46523,-222.79255 C 823.46523,-222.79255 815.25266,-224.99632 812.3088,-226.08891 C 809.36494,-227.1815 804.63568,-229.23299 801.94358,-229.74288 C 799.25149,-230.25276 789.64298,-227.95417 789.64298,-227.95417 z"
2488 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2489 <path
2490 inkscape:transform-center-y="-4.3190906"
2491 inkscape:transform-center-x="13.852145"
2492 sodipodi:nodetypes="czzzczzc"
2493 id="path8133"
2494 d="M 804.49513,-233.32948 C 804.49513,-233.32948 812.30269,-235.91229 814.16167,-235.99733 C 816.02064,-236.08236 817.78125,-235.37291 820.3563,-234.5364 C 822.93135,-233.69991 828.81609,-231.42974 831.13481,-230.0226 C 833.45353,-228.61545 837.44238,-227.23036 837.44238,-227.23036 C 837.44238,-227.23036 829.22981,-229.43413 826.28595,-230.52672 C 823.34209,-231.61931 818.61283,-233.6708 815.92073,-234.18069 C 813.22864,-234.69057 804.49513,-233.32948 804.49513,-233.32948 z"
2495 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2496 <path
2497 inkscape:transform-center-y="-4.3190906"
2498 inkscape:transform-center-x="13.852145"
2499 sodipodi:nodetypes="czzzczzc"
2500 id="path8135"
2501 d="M 819.55763,-237.57948 C 819.55763,-237.57948 828.11519,-240.16229 829.97417,-240.24733 C 831.83314,-240.33236 833.59375,-239.62291 836.1688,-238.7864 C 838.74385,-237.94991 844.62859,-235.67974 846.94731,-234.2726 C 849.26603,-232.86545 853.25488,-231.48036 853.25488,-231.48036 C 853.25488,-231.48036 845.04231,-233.68413 842.09845,-234.77672 C 839.15459,-235.86931 834.42533,-237.9208 831.73323,-238.43069 C 829.04114,-238.94057 819.55763,-237.57948 819.55763,-237.57948 z"
2502 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2503 <path
2504 inkscape:transform-center-y="-4.9269042"
2505 inkscape:transform-center-x="13.64141"
2506 sodipodi:nodetypes="czzzczzc"
2507 id="path8137"
2508 d="M 836.23395,-242.60125 C 836.23395,-242.60125 843.20097,-244.58848 845.06179,-244.56882 C 846.9226,-244.54915 848.64052,-243.7418 851.16444,-242.76177 C 853.68837,-241.78177 859.4361,-239.18419 861.672,-237.64886 C 863.9079,-236.11351 867.81253,-234.50625 867.81253,-234.50625 C 867.81253,-234.50625 859.73692,-237.16847 856.85917,-238.42491 C 853.98143,-239.68136 849.37505,-241.99561 846.71589,-242.65612 C 844.05674,-243.31661 836.23395,-242.60125 836.23395,-242.60125 z"
2509 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2510 <path
2511 inkscape:transform-center-y="-5.1542119"
2512 inkscape:transform-center-x="13.55068"
2513 sodipodi:nodetypes="czzzczzc"
2514 id="path8139"
2515 d="M 850.73028,-246.00461 C 850.73028,-246.00461 858.41812,-248.03229 860.2781,-247.97315 C 862.13807,-247.914 863.83848,-247.07036 866.34103,-246.03699 C 868.84358,-245.00365 874.5349,-242.28467 876.73771,-240.70224 C 878.94053,-239.11979 882.81016,-237.43004 882.81016,-237.43004 C 882.81016,-237.43004 874.79287,-240.26302 871.94244,-241.58026 C 869.09201,-242.89749 864.53578,-245.30898 861.89124,-246.02576 C 859.2467,-246.74254 850.73028,-246.00461 850.73028,-246.00461 z"
2516 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2517 <path
2518 inkscape:transform-center-y="-5.4740887"
2519 inkscape:transform-center-x="13.41151"
2520 sodipodi:nodetypes="czzzczzc"
2521 id="path8141"
2522 d="M 864.82496,-249.21081 C 864.82496,-249.21081 872.99448,-251.17987 874.85184,-251.06477 C 876.70919,-250.94965 878.38342,-250.05521 880.85374,-248.94698 C 883.32405,-247.83877 888.93094,-244.94971 891.08512,-243.30167 C 893.2393,-241.65363 897.05632,-239.84815 897.05632,-239.84815 C 897.05632,-239.84815 889.12793,-242.92121 886.31845,-244.32365 C 883.50896,-245.72609 879.02739,-248.27364 876.40562,-249.06971 C 873.78386,-249.86577 864.82496,-249.21081 864.82496,-249.21081 z"
2523 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2524 <path
2525 inkscape:transform-center-y="-5.79376"
2526 inkscape:transform-center-x="13.258805"
2527 sodipodi:nodetypes="czzzczzc"
2528 id="path8143"
2529 d="M 881.38485,-251.60282 C 881.38485,-251.60282 889.47021,-253.51091 891.32322,-253.33946 C 893.17622,-253.16799 894.82252,-252.22313 897.25804,-251.04038 C 899.69357,-249.85767 905.21013,-246.79968 907.31327,-245.08699 C 909.41641,-243.37429 913.17684,-241.45373 913.17684,-241.45373 C 913.17684,-241.45373 905.34544,-244.76613 902.57984,-246.25323 C 899.81423,-247.74035 895.41209,-250.42282 892.8157,-251.29814 C 890.21933,-252.17345 881.38485,-251.60282 881.38485,-251.60282 z"
2530 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2531 <path
2532 inkscape:transform-center-y="-5.7433893"
2533 inkscape:transform-center-x="13.28378"
2534 sodipodi:nodetypes="czzzczzc"
2535 id="path8145"
2536 d="M 896.58415,-254.34724 C 896.58415,-254.34724 904.22581,-255.77494 906.07962,-255.61239 C 907.93342,-255.44983 909.58424,-254.51289 912.02541,-253.34186 C 914.46659,-252.17086 919.99779,-249.1394 922.10913,-247.43684 C 924.22047,-245.73426 927.99009,-243.83179 927.99009,-243.83179 C 927.99009,-243.83179 920.14286,-247.10653 917.37014,-248.58034 C 914.59743,-250.05414 910.18245,-252.71543 907.58189,-253.57827 C 904.98134,-254.44109 896.58415,-254.34724 896.58415,-254.34724 z"
2537 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2538 <path
2539 inkscape:transform-center-y="-5.7433893"
2540 inkscape:transform-center-x="13.28378"
2541 sodipodi:nodetypes="czzzczzc"
2542 id="path8147"
2543 d="M 911.45328,-255.98544 C 911.45328,-255.98544 920.09494,-257.53814 921.94875,-257.37559 C 923.80255,-257.21303 925.45337,-256.27609 927.89454,-255.10506 C 930.33572,-253.93406 935.86692,-250.9026 937.97826,-249.20004 C 940.0896,-247.49746 943.85922,-245.59499 943.85922,-245.59499 C 943.85922,-245.59499 936.01199,-248.86973 933.23927,-250.34354 C 930.46656,-251.81734 926.05158,-254.47863 923.45102,-255.34147 C 920.85047,-256.20429 911.45328,-255.98544 911.45328,-255.98544 z"
2544 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2545 <path
2546 inkscape:transform-center-y="-5.7433893"
2547 inkscape:transform-center-x="13.28378"
2548 sodipodi:nodetypes="czzzczzc"
2549 id="path8149"
2550 d="M 927.70328,-258.29794 C 927.70328,-258.29794 935.34494,-259.16314 937.19875,-259.00059 C 939.05255,-258.83803 940.70337,-257.90109 943.14454,-256.73006 C 945.58572,-255.55906 951.11692,-252.5276 953.22826,-250.82504 C 955.3396,-249.12246 959.10922,-247.21999 959.10922,-247.21999 C 959.10922,-247.21999 951.26199,-250.49473 948.48927,-251.96854 C 945.71656,-253.44234 941.30158,-256.10363 938.70102,-256.96647 C 936.10047,-257.82929 927.70328,-258.29794 927.70328,-258.29794 z"
2551 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2552 <path
2553 inkscape:transform-center-y="-5.7433893"
2554 inkscape:transform-center-x="13.28378"
2555 sodipodi:nodetypes="czzzczzc"
2556 id="path8151"
2557 d="M 942.82828,-259.48544 C 942.82828,-259.48544 951.40744,-260.97564 953.26125,-260.81309 C 955.11505,-260.65053 956.76587,-259.71359 959.20704,-258.54256 C 961.64822,-257.37156 967.17942,-254.3401 969.29076,-252.63754 C 971.4021,-250.93496 975.17172,-249.03249 975.17172,-249.03249 C 975.17172,-249.03249 967.32449,-252.30723 964.55177,-253.78104 C 961.77906,-255.25484 957.36408,-257.91613 954.76352,-258.77897 C 952.16297,-259.64179 942.82828,-259.48544 942.82828,-259.48544 z"
2558 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2559 <path
2560 inkscape:transform-center-y="-5.7433893"
2561 inkscape:transform-center-x="13.28378"
2562 sodipodi:nodetypes="czzzczzc"
2563 id="path8153"
2564 d="M 959.07828,-261.54794 C 959.07828,-261.54794 966.90744,-262.35064 968.76125,-262.18809 C 970.61505,-262.02553 972.26587,-261.08859 974.70704,-259.91756 C 977.14822,-258.74656 982.67942,-255.7151 984.79076,-254.01254 C 986.9021,-252.30996 990.67172,-250.40749 990.67172,-250.40749 C 990.67172,-250.40749 982.82449,-253.68223 980.05177,-255.15604 C 977.27906,-256.62984 972.86408,-259.29113 970.26352,-260.15397 C 967.66297,-261.01679 959.07828,-261.54794 959.07828,-261.54794 z"
2565 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2566 <path
2567 inkscape:transform-center-y="-5.7433893"
2568 inkscape:transform-center-x="13.28378"
2569 sodipodi:nodetypes="czzzczzc"
2570 id="path8155"
2571 d="M 974.45328,-262.79794 C 974.45328,-262.79794 982.84494,-263.97564 984.69875,-263.81309 C 986.55255,-263.65053 988.20337,-262.71359 990.64454,-261.54256 C 993.08572,-260.37156 998.61692,-257.3401 1000.7283,-255.63754 C 1002.8396,-253.93496 1006.6092,-252.03249 1006.6092,-252.03249 C 1006.6092,-252.03249 998.76199,-255.30723 995.98927,-256.78104 C 993.21656,-258.25484 988.80158,-260.91613 986.20102,-261.77897 C 983.60047,-262.64179 974.45328,-262.79794 974.45328,-262.79794 z"
2572 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2573 <path
2574 inkscape:transform-center-y="-5.7433893"
2575 inkscape:transform-center-x="13.28378"
2576 sodipodi:nodetypes="czzzczzc"
2577 id="path8157"
2578 d="M 990.64078,-264.86044 C 990.64078,-264.86044 997.53244,-265.85064 999.38625,-265.68809 C 1001.2401,-265.52553 1002.8909,-264.58859 1005.332,-263.41756 C 1007.7732,-262.24656 1013.3044,-259.2151 1015.4158,-257.51254 C 1017.5271,-255.80996 1021.2967,-253.90749 1021.2967,-253.90749 C 1021.2967,-253.90749 1013.4495,-257.18223 1010.6768,-258.65604 C 1007.9041,-260.12984 1003.4891,-262.79113 1000.8885,-263.65397 C 998.28797,-264.51679 990.64078,-264.86044 990.64078,-264.86044 z"
2579 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2580 <path
2581 inkscape:transform-center-y="-5.7433893"
2582 inkscape:transform-center-x="13.28378"
2583 sodipodi:nodetypes="czzzczzc"
2584 id="path8159"
2585 d="M 1007.7658,-265.79794 C 1007.7658,-265.79794 1014.5949,-266.97564 1016.4488,-266.81309 C 1018.3026,-266.65053 1019.9534,-265.71359 1022.3945,-264.54256 C 1024.8357,-263.37156 1030.3669,-260.3401 1032.4783,-258.63754 C 1034.5896,-256.93496 1038.3592,-255.03249 1038.3592,-255.03249 C 1038.3592,-255.03249 1030.512,-258.30723 1027.7393,-259.78104 C 1024.9666,-261.25484 1020.5516,-263.91613 1017.951,-264.77897 C 1015.3505,-265.64179 1007.7658,-265.79794 1007.7658,-265.79794 z"
2586 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2587 <path
2588 inkscape:transform-center-y="-5.7433893"
2589 inkscape:transform-center-x="13.28378"
2590 sodipodi:nodetypes="czzzczzc"
2591 id="path8161"
2592 d="M 1023.8908,-267.79794 C 1023.8908,-267.79794 1029.9699,-268.22564 1031.8238,-268.06309 C 1033.6776,-267.90053 1035.3284,-266.96359 1037.7695,-265.79256 C 1040.2107,-264.62156 1045.7419,-261.5901 1047.8533,-259.88754 C 1049.9646,-258.18496 1053.7342,-256.28249 1053.7342,-256.28249 C 1053.7342,-256.28249 1045.887,-259.55723 1043.1143,-261.03104 C 1040.3416,-262.50484 1035.9266,-265.16613 1033.326,-266.02897 C 1030.7255,-266.89179 1023.8908,-267.79794 1023.8908,-267.79794 z"
2593 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2594 <path
2595 inkscape:transform-center-y="-5.7433893"
2596 inkscape:transform-center-x="13.28378"
2597 sodipodi:nodetypes="czzzczzc"
2598 id="path8163"
2599 d="M 1039.7033,-269.17294 C 1039.7033,-269.17294 1046.1574,-269.85064 1048.0113,-269.68809 C 1049.8651,-269.52553 1051.5159,-268.58859 1053.957,-267.41756 C 1056.3982,-266.24656 1061.9294,-263.2151 1064.0408,-261.51254 C 1066.1521,-259.80996 1069.9217,-257.90749 1069.9217,-257.90749 C 1069.9217,-257.90749 1062.0745,-261.18223 1059.3018,-262.65604 C 1056.5291,-264.12984 1052.1141,-266.79113 1049.5135,-267.65397 C 1046.913,-268.51679 1039.7033,-269.17294 1039.7033,-269.17294 z"
2600 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2601 <path
2602 inkscape:transform-center-y="-5.1360724"
2603 inkscape:transform-center-x="13.55813"
2604 sodipodi:nodetypes="czzzczzc"
2605 id="path8165"
2606 d="M 1055.2718,-271.03319 C 1055.2718,-271.03319 1060.7694,-271.94264 1062.6296,-271.88667 C 1064.4897,-271.83067 1066.1915,-270.98993 1068.6957,-269.96081 C 1071.2001,-268.93171 1076.896,-266.22241 1079.1015,-264.64372 C 1081.307,-263.06501 1085.1795,-261.38182 1085.1795,-261.38182 C 1085.1795,-261.38182 1077.1575,-264.20121 1074.3047,-265.5136 C 1071.4521,-266.82598 1066.8918,-269.22973 1064.246,-269.94203 C 1061.6003,-270.65431 1055.2718,-271.03319 1055.2718,-271.03319 z"
2607 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2608 <path
2609 inkscape:transform-center-y="-4.6370147"
2610 inkscape:transform-center-x="13.74758"
2611 sodipodi:nodetypes="czzzczzc"
2612 id="path8167"
2613 d="M 1072.7007,-273.48537 C 1072.7007,-273.48537 1077.2479,-274.64118 1079.1087,-274.67158 C 1080.9694,-274.70196 1082.7083,-273.94109 1085.2576,-273.02927 C 1087.807,-272.1175 1093.6225,-269.67541 1095.899,-268.20077 C 1098.1753,-266.72609 1102.1217,-265.22441 1102.1217,-265.22441 C 1102.1217,-265.22441 1093.9775,-267.66852 1091.067,-268.84713 C 1088.1565,-270.02573 1083.4896,-272.21528 1080.8136,-272.80404 C 1078.1377,-273.39279 1072.7007,-273.48537 1072.7007,-273.48537 z"
2614 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2615 <path
2616 inkscape:transform-center-y="-4.4842392"
2617 inkscape:transform-center-x="13.79933"
2618 sodipodi:nodetypes="czzzczzc"
2619 id="path8169"
2620 d="M 1087.1585,-276.5244 C 1087.1585,-276.5244 1093.1185,-278.29795 1094.9787,-278.35464 C 1096.8387,-278.41131 1098.5883,-277.67509 1101.1502,-276.79939 C 1103.7122,-275.92373 1103.6728,-275.94226 1106.4837,-275.30924 C 1109.2806,-274.67938 1113.5604,-273.79611 1113.5604,-273.79611 C 1113.5604,-273.79611 1109.9449,-273.81239 1106.7681,-274.26225 C 1103.6526,-274.70344 1099.3938,-275.9605 1096.7097,-276.51138 C 1094.0258,-277.06226 1087.1585,-276.5244 1087.1585,-276.5244 z"
2621 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new" />
2622 <path
2623 sodipodi:nodetypes="czczc"
2624 id="path8171"
2625 d="M 1099.25,-279.92981 C 1099.4112,-279.66119 1110.4581,-284.53027 1111.4375,-284.61731 C 1112.4169,-284.70435 1113.4375,-281.49231 1113.4375,-281.49231 C 1113.4375,-281.49231 1112.6624,-282.99665 1110.5625,-282.55481 C 1108.4626,-282.11297 1099.2616,-279.8834 1099.25,-279.92981 z"
2626 style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
2627 </g>
2628 <path
2629 id="path8173"
2630 d="M 1107.4532,-284.0938 C 1107.0345,-283.88097 1107.2976,-283.99991 1106.806,-283.78799 C 1106.3199,-283.57845 1105.0826,-283.2136 1102.7264,-282.32904 C 1099.3953,-281.07847 1096.8962,-280.1756 1095.7005,-279.26294 C 1094.1644,-279.24168 1091.98,-279.02963 1090.0442,-278.54419 C 1087.0627,-277.79653 1085.189,-277.37018 1083.2942,-276.95044 C 1081.3994,-276.53072 1081.6187,-276.30825 1080.4192,-276.07544 C 1079.1226,-275.82378 1078.6978,-276.08443 1074.9817,-275.29419 C 1071.4918,-274.55205 1066.0869,-273.36312 1064.8255,-272.60669 C 1063.2416,-272.78748 1060.958,-272.92847 1058.9817,-272.63794 C 1055.9413,-272.19099 1054.0655,-271.96518 1052.138,-271.73169 C 1051.4826,-271.65232 1051.0969,-271.53091 1050.7942,-271.45044 C 1050.368,-271.31879 1050.1084,-271.19042 1049.4192,-271.10669 C 1048.1076,-270.94733 1047.657,-271.26352 1043.888,-270.82544 C 1040.3341,-270.41236 1034.8826,-269.55262 1033.638,-268.88794 C 1032.039,-269.18524 1029.7802,-269.42213 1027.7942,-269.23169 C 1024.7354,-268.93838 1022.8222,-268.7477 1020.888,-268.57544 C 1018.9537,-268.40318 1019.1993,-268.15307 1017.9817,-268.04419 C 1016.6655,-267.9265 1016.2219,-268.20782 1012.4505,-267.79419 C 1008.9086,-267.40575 1003.4426,-266.58492 1002.1692,-265.91919 C 1000.5703,-266.21389 998.28202,-266.4262 996.29419,-266.23169 C 993.23595,-265.93245 991.35306,-265.75145 989.41919,-265.57544 C 988.7617,-265.5156 988.37915,-265.39688 988.07544,-265.32544 C 987.64779,-265.20649 987.39193,-265.10737 986.70044,-265.04419 C 985.38448,-264.92394 984.94068,-265.23907 981.16919,-264.82544 C 977.61305,-264.43543 972.16365,-263.58628 970.91919,-262.91919 C 969.32056,-263.21338 967.05935,-263.44291 965.07544,-263.23169 C 962.01984,-262.90637 960.1014,-262.70545 958.16919,-262.51294 C 956.23698,-262.32044 956.47932,-262.07206 955.26294,-261.95044 C 953.94806,-261.81898 953.49996,-262.11498 949.73169,-261.66919 C 946.19282,-261.25054 940.75401,-260.37702 939.48169,-259.70044 C 937.88414,-259.98149 935.62173,-260.12087 933.63794,-259.88794 C 930.58596,-259.52958 928.69286,-259.32008 926.76294,-259.10669 C 926.10674,-259.03414 925.72228,-258.934 925.41919,-258.85669 C 924.99242,-258.72947 924.73428,-258.58949 924.04419,-258.51294 C 922.73086,-258.36726 922.27673,-258.68696 918.51294,-258.20044 C 914.96405,-257.74169 909.53431,-256.78142 908.29419,-256.07544 C 906.70114,-256.31968 904.46038,-256.45679 902.48169,-256.20044 C 899.4341,-255.80563 897.53098,-255.55199 895.60669,-255.29419 C 893.68241,-255.0364 893.88058,-254.80066 892.66919,-254.63794 C 891.35973,-254.46204 890.922,-254.74232 887.16919,-254.16919 C 883.6449,-253.63095 878.24604,-252.47002 876.98169,-251.73169 C 875.39419,-251.93523 873.13619,-251.98642 871.16919,-251.63794 C 868.14302,-251.10182 866.2703,-250.77625 864.35669,-250.45044 C 863.70608,-250.33967 863.34298,-250.1795 863.04419,-250.07544 C 862.62352,-249.91056 862.38074,-249.74414 861.70044,-249.60669 C 860.40579,-249.3451 859.97332,-249.61289 856.26294,-248.79419 C 852.76441,-248.02224 847.41699,-246.41126 846.20044,-245.57544 C 844.63766,-245.65289 842.44286,-245.49016 840.51294,-244.98169 C 837.5405,-244.19856 835.69533,-243.7496 833.82544,-243.23169 C 831.95556,-242.71379 832.15884,-242.46441 830.98169,-242.13794 C 829.70923,-241.78504 829.28466,-242.03085 825.63794,-240.95044 C 822.21324,-239.93581 816.9885,-238.01727 815.76294,-237.10669 C 814.22411,-237.09395 812.04311,-236.83447 810.13794,-236.23169 C 807.20688,-235.30435 805.38763,-234.77327 803.54419,-234.16919 C 802.91743,-233.96381 802.55246,-233.77661 802.26294,-233.63794 C 801.85531,-233.42433 801.6096,-233.22919 800.95044,-233.01294 C 799.69598,-232.6014 799.26433,-232.8239 795.66919,-231.57544 C 792.27934,-230.39827 787.07421,-228.36174 785.88794,-227.41919 C 784.36405,-227.35952 782.23789,-227.02432 780.35669,-226.35669 C 777.4593,-225.3284 775.65761,-224.68121 773.82544,-224.04419 C 771.99327,-223.40718 772.19759,-223.19565 771.04419,-222.79419 C 769.79741,-222.36023 769.38058,-222.59447 765.82544,-221.23169 C 762.48677,-219.95189 757.33829,-217.74914 756.13794,-216.76294 C 754.63076,-216.65525 752.50225,-216.26414 750.63794,-215.54419 C 747.76976,-214.43659 746.01414,-213.76263 744.20044,-213.07544 C 743.58378,-212.84181 743.20403,-212.63341 742.91919,-212.48169 C 742.91919,-212.48169 742.91919,-211.38794 742.91919,-211.38794 C 743.03097,-211.6103 743.30518,-212.20537 743.82544,-212.48169 C 744.52341,-212.85241 748.63907,-214.47506 750.63794,-215.20044 C 752.2948,-215.80169 754.79183,-216.52912 756.60669,-216.51294 C 756.90831,-216.51025 757.19431,-216.46204 757.45044,-216.41919 C 759.29293,-216.11094 764.91919,-214.85669 764.91919,-214.85669 C 764.91918,-214.85669 758.6857,-216.50344 757.88794,-216.70044 C 757.69715,-216.74755 757.35222,-216.76916 756.91919,-216.76294 C 758.06465,-217.63265 761.68019,-219.15645 764.26294,-220.20044 C 767.10116,-221.34771 767.37975,-221.45226 769.32544,-221.85669 C 771.33374,-222.27413 772.48169,-222.35669 772.48169,-222.35669 C 772.48169,-222.35668 772.39933,-222.95783 773.45044,-223.48169 C 774.15554,-223.8331 778.33746,-225.37409 780.35669,-226.04419 C 782.30859,-226.69192 785.41409,-227.40269 787.23169,-227.04419 C 789.09492,-226.67669 794.76294,-225.23169 794.76294,-225.23169 C 794.76295,-225.23169 788.47594,-227.1028 787.66919,-227.32544 C 787.47627,-227.37869 787.13835,-227.41148 786.70044,-227.41919 C 787.85878,-228.25207 791.49488,-229.61451 794.10669,-230.57544 C 796.97685,-231.63145 797.27403,-231.73724 799.23169,-232.10669 C 801.08518,-232.45648 802.09053,-232.53217 802.26294,-232.54419 C 802.37654,-232.76143 802.64039,-233.35421 803.16919,-233.60669 C 803.87863,-233.94543 808.09526,-235.31944 810.13794,-235.91919 C 811.83111,-236.4163 814.37871,-236.95596 816.23169,-236.82544 C 816.53964,-236.80376 816.84518,-236.72818 817.10669,-236.66919 C 818.98787,-236.24487 824.70044,-234.63794 824.70044,-234.63794 C 824.70045,-234.63794 818.3587,-236.70319 817.54419,-236.95044 C 817.3494,-237.00958 816.98631,-237.05438 816.54419,-237.07544 C 817.71368,-237.87299 821.40721,-239.13166 824.04419,-240.01294 C 826.942,-240.98141 827.2772,-241.01626 829.26294,-241.29419 C 831.31259,-241.58108 832.45044,-241.60669 832.45044,-241.60669 C 832.45042,-241.60669 832.37769,-242.21366 833.45044,-242.66919 C 834.17004,-242.97476 838.44142,-244.16994 840.51294,-244.66919 C 842.51538,-245.15177 845.71143,-245.59748 847.57544,-245.07544 C 849.48622,-244.54029 855.29419,-242.57544 855.29419,-242.57544 C 855.29419,-242.57544 848.87153,-244.99895 848.04419,-245.29419 C 847.84635,-245.3648 847.46203,-245.43458 847.01294,-245.48169 C 848.20084,-246.21034 851.92821,-247.25577 854.60669,-247.98169 C 857.55011,-248.77944 857.89877,-248.75252 859.91919,-248.88794 C 861.83208,-249.01617 862.86624,-248.95903 863.04419,-248.95044 C 863.16147,-249.1541 863.43595,-249.72992 863.98169,-249.91919 C 864.71388,-250.17313 869.06021,-250.96708 871.16919,-251.29419 C 872.91732,-251.5653 875.57007,-251.77889 877.48169,-251.38794 C 877.79935,-251.32298 878.08691,-251.20243 878.35669,-251.10669 C 880.29743,-250.41796 886.20044,-248.01294 886.20044,-248.01294 C 886.20045,-248.01294 879.66573,-250.96371 878.82544,-251.32544 C 878.62447,-251.41195 878.25031,-251.49223 877.79419,-251.57544 C 879.00069,-252.20862 882.82375,-252.97104 885.54419,-253.48169 C 888.53372,-254.04288 888.84442,-254.01123 890.88794,-254.01294 C 892.9972,-254.01471 894.20044,-253.88794 894.20044,-253.88794 C 894.20044,-253.88793 894.12773,-254.51913 895.23169,-254.82544 C 895.97221,-255.03091 900.35781,-255.65931 902.48169,-255.88794 C 904.53471,-256.10893 907.80032,-256.14016 909.70044,-255.41919 C 911.64823,-254.68012 917.54419,-252.04419 917.54419,-252.04419 C 917.54421,-252.04419 910.98131,-255.22316 910.13794,-255.60669 C 909.93626,-255.69842 909.59573,-255.7929 909.13794,-255.88794 C 910.34886,-256.48982 914.12236,-257.13678 916.85669,-257.54419 C 919.86149,-257.99191 920.1822,-257.99589 922.23169,-257.95044 C 924.17214,-257.90742 925.23868,-257.75621 925.41919,-257.73169 C 925.53811,-257.92485 925.80309,-258.49752 926.35669,-258.63794 C 927.0994,-258.82632 931.51098,-259.37222 933.63794,-259.57544 C 935.40097,-259.74386 938.05803,-259.80973 939.98169,-259.32544 C 940.30137,-259.24496 940.5852,-259.12185 940.85669,-259.01294 C 942.80962,-258.22945 948.76294,-255.54419 948.76294,-255.54419 C 948.76292,-255.54419 942.17103,-258.79767 941.32544,-259.20044 C 941.12322,-259.29676 940.75318,-259.40747 940.29419,-259.51294 C 941.50833,-260.08721 945.33785,-260.63513 948.07544,-261.01294 C 951.08382,-261.42814 951.39851,-261.45557 953.45044,-261.38794 C 955.56842,-261.31813 956.76294,-261.13794 956.76294,-261.13794 C 956.76292,-261.13794 956.68569,-261.77535 957.79419,-262.04419 C 958.53781,-262.22454 962.94595,-262.70774 965.07544,-262.88794 C 967.13391,-263.06211 970.41868,-263.01226 972.32544,-262.23169 C 974.28003,-261.43153 980.20044,-258.70044 980.20044,-258.70044 C 980.20042,-258.70044 973.64051,-262.0092 972.79419,-262.41919 C 972.59182,-262.51724 972.22233,-262.62229 971.76294,-262.73169 C 972.97811,-263.29559 976.77302,-263.84599 979.51294,-264.20044 C 982.52385,-264.58996 982.83425,-264.59809 984.88794,-264.51294 C 986.83233,-264.43234 987.89457,-264.2597 988.07544,-264.23169 C 988.1946,-264.42255 988.45821,-264.977 989.01294,-265.10669 C 989.7572,-265.28069 994.16287,-265.75716 996.29419,-265.91919 C 998.06081,-266.05346 1000.7439,-266.0449 1002.6692,-265.54419 C 1002.9892,-265.46098 1003.2725,-265.34292 1003.5442,-265.23169 C 1005.4988,-264.43153 1011.4505,-261.66919 1011.4505,-261.66919 C 1011.4504,-261.66919 1004.8593,-265.0092 1004.013,-265.41919 C 1003.8106,-265.51724 1003.4411,-265.6223 1002.9817,-265.73169 C 1004.1968,-266.29559 1008.023,-266.81475 1010.763,-267.16919 C 1013.7739,-267.55872 1014.1155,-267.59809 1016.1692,-267.51294 C 1018.2889,-267.42506 1019.4817,-267.20044 1019.4817,-267.20044 C 1019.4817,-267.20044 1019.4033,-267.84946 1020.513,-268.10669 C 1021.2573,-268.27925 1025.6625,-268.73005 1027.7942,-268.88794 C 1029.8548,-269.04054 1033.1371,-268.98471 1035.0442,-268.20044 C 1036.9992,-267.39649 1042.9192,-264.70044 1042.9192,-264.70044 C 1042.9192,-264.70044 1036.3594,-267.97631 1035.513,-268.38794 C 1035.3105,-268.48638 1034.9412,-268.59016 1034.4817,-268.70044 C 1035.6971,-269.26198 1039.4936,-269.82822 1042.2317,-270.20044 C 1045.2407,-270.60949 1045.5544,-270.61602 1047.6067,-270.54419 C 1049.5498,-270.4762 1050.6139,-270.37934 1050.7942,-270.35669 C 1050.913,-270.55109 1051.1788,-271.0855 1051.7317,-271.23169 C 1052.4735,-271.42781 1056.8628,-272.06047 1058.9817,-272.32544 C 1060.7381,-272.54505 1063.387,-272.65775 1065.2942,-272.29419 C 1065.6111,-272.23378 1065.9,-272.10481 1066.1692,-272.01294 C 1068.1054,-271.35202 1074.013,-269.07544 1074.013,-269.07544 C 1074.0129,-269.07544 1067.4763,-271.88199 1066.638,-272.23169 C 1066.4375,-272.31532 1066.0618,-272.40502 1065.6067,-272.48169 C 1066.8104,-273.13215 1070.6258,-273.85364 1073.3255,-274.48169 C 1076.2922,-275.17189 1076.6144,-275.23676 1078.638,-275.35669 C 1080.7266,-275.48049 1081.9192,-275.38794 1081.9192,-275.38794 C 1081.9192,-275.38793 1081.8322,-276.01999 1082.9192,-276.41919 C 1083.6484,-276.68699 1087.9664,-277.75716 1090.0442,-278.23169 C 1092.0527,-278.69038 1095.2121,-279.26099 1097.0442,-278.85669 C 1098.9223,-278.44223 1110.6224,-275.84106 1110.6224,-275.84106 C 1110.6224,-275.84106 1098.2949,-278.86372 1097.4817,-279.10669 C 1097.2872,-279.16481 1096.9231,-279.21295 1096.4817,-279.23169 C 1097.6493,-280.03538 1099.9959,-280.91899 1102.5911,-281.93481 C 1104.2725,-282.59299 1103.5148,-282.3114 1105.367,-282.93841 C 1107.1206,-283.53207 1107.8524,-283.94912 1107.9974,-284.0514 C 1108.3435,-284.25791 1107.6414,-284.17328 1107.4532,-284.0938 z"
2631 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7333);enable-background:new"
2632 sodipodi:nodetypes="czscsssscssssscsssscssssscsssscssssscsssscssssscsssscssssscsssscssccsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscsssscscsscssscscsscc" />
2633 <path
2634 id="path8175"
2635 d="M 1082.625,-275.125 C 1084.498,-274.73152 1087.1211,-273.97945 1088.6563,-273.15625 C 1090.1915,-272.33306 1091.4785,-272.10025 1094.0313,-270.65625 C 1096.5579,-269.22699 1098.8271,-268.64929 1101,-268.125 C 1103.3476,-267.55858 1106.4354,-267.40977 1109.8438,-266.9375 C 1108.7549,-267.77725 1103.2364,-268.10995 1101.4375,-268.5 C 1099.6386,-268.89006 1097.5434,-269.51616 1094.8438,-270.8125 C 1092.1441,-272.10884 1091.3494,-272.61146 1089.0313,-273.5 C 1086.7131,-274.38854 1085.0269,-274.88314 1082.625,-275.125 z"
2636 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7285);enable-background:new" />
2637 <path
2638 id="path8177"
2639 d="M 1051.4688,-270 C 1053.3741,-269.42241 1055.9969,-268.38428 1057.5625,-267.40625 C 1059.1281,-266.42823 1060.4427,-266.04644 1063.0625,-264.28125 C 1065.6555,-262.53409 1068.0484,-261.57198 1070.3125,-260.6875 C 1072.7586,-259.73193 1075.9951,-259.03037 1079.7188,-257.625 C 1078.5292,-258.76284 1072.6557,-260.31175 1070.7813,-261 C 1068.9068,-261.68825 1066.6995,-262.5662 1063.9063,-264.28125 C 1061.113,-265.99629 1060.3327,-266.56515 1057.9688,-267.6875 C 1055.6047,-268.80984 1053.9121,-269.52205 1051.4688,-270 z"
2640 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7289);enable-background:new" />
2641 <path
2642 id="path8179"
2643 d="M 1020.2188,-266.84375 C 1022.1307,-266.20564 1024.8,-265.08839 1026.375,-264.03125 C 1027.9501,-262.9741 1029.2706,-262.52258 1031.9063,-260.625 C 1034.5149,-258.74679 1036.9347,-257.59497 1039.2188,-256.5625 C 1041.6865,-255.44705 1044.9833,-254.3892 1048.75,-252.71875 C 1047.5467,-253.94128 1041.5472,-256.03298 1039.6563,-256.84375 C 1037.7653,-257.65452 1035.5914,-258.73754 1032.7813,-260.59375 C 1029.9711,-262.44995 1029.1595,-263.07068 1026.7813,-264.3125 C 1024.403,-265.5543 1022.6706,-266.28819 1020.2188,-266.84375 z"
2644 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7293);enable-background:new" />
2645 <path
2646 id="path8181"
2647 d="M 1110.1719,-266.89063 C 1110.3227,-266.84207 1110.8599,-266.25963 1110.2813,-265.40625 C 1109.4712,-264.21166 1104.5764,-262.08196 1101.7188,-261.28125 C 1098.8739,-260.48413 1095.4287,-260.30351 1091.1563,-261.65625 C 1086.8547,-263.0182 1085.6866,-264.12497 1080.5,-265.96875 C 1085.164,-263.85358 1086.6953,-262.01642 1090.625,-260.625 C 1092.2457,-260.05113 1093.9921,-259.6854 1095.6875,-259.59375 C 1095.2424,-259.26812 1094.1572,-258.61045 1092.125,-258 C 1089.3295,-257.16031 1085.4759,-256.46622 1083.875,-256.375 C 1082.3604,-256.28868 1080.733,-256.88749 1080.4375,-257 C 1080.6042,-256.89692 1080.8107,-256.62266 1080.1875,-255.96875 C 1079.2882,-255.02512 1074.0401,-254.04575 1071.0625,-253.71875 C 1068.0982,-253.3932 1064.5409,-253.73471 1060.1563,-255.625 C 1056.1783,-257.33997 1054.8173,-258.54036 1050.75,-260.375 C 1050.75,-260.375 1050.75,-260.21875 1050.75,-260.21875 C 1054.3931,-258.12346 1056.034,-256.33548 1059.625,-254.65625 C 1061.3552,-253.84716 1063.2167,-253.24749 1065.0313,-252.9375 C 1064.4964,-252.65074 1063.4735,-252.22599 1061.5938,-251.90625 C 1058.7248,-251.41829 1054.7848,-251.09011 1053.1563,-251.15625 C 1052.3056,-251.19079 1051.4277,-251.34062 1050.75,-251.5625 C 1050.0652,-251.77738 1049.5603,-252.00717 1049.4375,-252.0625 C 1049.6069,-251.95529 1049.8686,-251.65962 1049.2188,-251.03125 C 1048.3091,-250.15163 1042.9727,-249.69487 1039.9688,-249.5625 C 1036.9783,-249.43071 1033.3799,-250.01313 1028.9688,-252.125 C 1024.5276,-254.25126 1023.3273,-255.5266 1018.0625,-257.90625 C 1022.7968,-255.30921 1024.349,-253.27715 1028.4063,-251.1875 C 1030.0796,-250.32565 1031.8915,-249.69325 1033.6563,-249.25 C 1033.193,-249.01668 1032.0669,-248.56186 1029.9688,-248.3125 C 1027.0825,-247.96952 1023.1342,-247.81962 1021.5,-247.9375 C 1019.9538,-248.049 1018.2688,-248.79446 1017.9688,-248.9375 C 1018.1379,-248.81721 1018.3826,-248.52702 1017.75,-247.9375 C 1016.8372,-247.08677 1011.5059,-246.67538 1008.5,-246.5625 C 1005.5075,-246.45013 1001.9103,-247.05293 997.5,-249.15625 C 993.49875,-251.06448 992.11197,-252.29408 988.03125,-254.25 C 988.03122,-254.25 988.03125,-254.09375 988.03125,-254.09375 C 991.68631,-251.88983 993.32546,-250.0412 996.9375,-248.1875 C 998.67779,-247.29435 1000.5745,-246.65923 1002.4063,-246.21875 C 1001.8663,-245.97045 1000.8282,-245.60342 998.9375,-245.375 C 996.05182,-245.02642 992.07145,-244.85405 990.4375,-244.96875 C 989.58405,-245.02865 988.71119,-245.22666 988.03125,-245.46875 C 987.34415,-245.70405 986.8419,-245.94101 986.71875,-246 C 986.88873,-245.88773 987.18323,-245.57775 986.53125,-244.96875 C 985.6186,-244.11625 980.25592,-243.67538 977.25,-243.5625 C 974.25754,-243.45013 970.65654,-244.09055 966.25,-246.15625 C 961.81347,-248.23603 960.60312,-249.48796 955.34375,-251.8125 C 960.07313,-249.26501 961.63449,-247.2347 965.6875,-245.1875 C 967.35905,-244.34317 969.17304,-243.72107 970.9375,-243.28125 C 970.47427,-243.04703 969.3478,-242.59718 967.25,-242.34375 C 964.36431,-241.99517 960.4138,-241.77423 958.78125,-241.875 C 957.23669,-241.97032 955.58094,-242.70385 955.28125,-242.84375 C 955.45024,-242.72522 955.66317,-242.4399 955.03125,-241.84375 C 954.11939,-240.98347 948.7846,-240.5135 945.78125,-240.375 C 942.7913,-240.2371 939.2138,-240.82568 934.8125,-242.84375 C 930.81942,-244.67464 929.44739,-245.87295 925.375,-247.75 C 925.37498,-247.75 925.375,-247.59375 925.375,-247.59375 C 929.02261,-245.46048 930.64533,-243.65888 934.25,-241.875 C 935.98675,-241.01549 937.85727,-240.42486 939.6875,-240 C 939.14803,-239.7471 938.13687,-239.35871 936.25,-239.09375 C 933.37022,-238.68939 929.41187,-238.44813 927.78125,-238.53125 C 926.92953,-238.57466 926.05355,-238.7398 925.375,-238.96875 C 924.68931,-239.19076 924.1854,-239.41214 924.0625,-239.46875 C 924.23209,-239.35976 924.4944,-239.0591 923.84375,-238.4375 C 922.93296,-237.56736 917.59354,-237.04598 914.59375,-236.875 C 911.60742,-236.70479 908.01994,-237.19077 903.625,-239.15625 C 899.20011,-241.13513 898.01904,-242.38444 892.78125,-244.53125 C 897.49122,-242.14358 899.05142,-240.14252 903.09375,-238.1875 C 904.7609,-237.38119 906.55418,-236.79092 908.3125,-236.40625 C 907.85087,-236.15755 906.7155,-235.694 904.625,-235.375 C 901.7494,-234.93624 897.8446,-234.6419 896.21875,-234.6875 C 894.68052,-234.73062 892.98595,-235.43272 892.6875,-235.5625 C 892.85583,-235.44968 893.09807,-235.14875 892.46875,-234.53125 C 891.56063,-233.64015 886.2658,-233.003 883.28125,-232.71875 C 880.31007,-232.43577 876.70783,-232.89455 872.34375,-234.65625 C 868.38441,-236.25456 867.0146,-237.45112 863,-238.96875 C 863.00003,-238.96875 863,-238.8125 863,-238.8125 C 866.5959,-237.00115 868.23831,-235.23017 871.8125,-233.65625 C 873.53457,-232.8979 875.39998,-232.3673 877.21875,-232.03125 C 876.68266,-231.75217 875.65217,-231.34362 873.78125,-230.96875 C 870.92586,-230.39665 866.99183,-229.94936 865.375,-229.9375 C 864.53049,-229.93129 863.66892,-230.01844 863,-230.1875 C 862.32409,-230.34901 861.83991,-230.51673 861.71875,-230.5625 C 861.88597,-230.46848 862.14142,-230.17902 861.5,-229.5 C 860.60213,-228.54948 855.31352,-227.58292 852.375,-227.0625 C 849.44966,-226.54441 845.94285,-226.68826 841.65625,-228.09375 C 837.34045,-229.50882 836.18348,-230.62369 831.09375,-232.0625 C 835.6706,-230.31149 837.1823,-228.50244 841.125,-227.0625 C 842.75108,-226.46861 844.49385,-226.10685 846.21875,-225.90625 C 845.7659,-225.60923 844.66397,-225.02286 842.625,-224.4375 C 839.82028,-223.63233 835.98614,-222.86167 834.40625,-222.6875 C 832.9115,-222.5227 831.29002,-223.00431 831,-223.09375 C 831.16356,-223.00368 831.39278,-222.73382 830.78125,-222.03125 C 829.89878,-221.0174 824.73673,-219.6596 821.84375,-218.96875 C 818.96373,-218.28097 815.50815,-218.20873 811.28125,-219.40625 C 807.4464,-220.4927 806.10867,-221.47862 802.21875,-222.53125 C 802.21874,-222.53125 802.21875,-222.375 802.21875,-222.375 C 805.70293,-220.98015 807.28816,-219.4556 810.75,-218.34375 C 812.41793,-217.80803 814.20578,-217.55701 815.96875,-217.46875 C 815.44911,-217.11663 814.46836,-216.55423 812.65625,-215.9375 C 809.89059,-214.99625 806.06601,-214.00213 804.5,-213.78125 C 803.68206,-213.66586 802.8669,-213.65842 802.21875,-213.75 C 801.56379,-213.83321 801.08615,-213.96827 800.96875,-214 C 801.13079,-213.92536 801.40274,-213.65956 800.78125,-212.90625 C 799.91125,-211.85172 794.77162,-210.247 791.90625,-209.46875 C 789.05372,-208.69399 785.64713,-208.51055 781.46875,-209.5625 C 777.26192,-210.62163 776.11206,-211.60416 771.125,-212.71875 C 775.60954,-211.25929 777.09435,-209.58352 780.9375,-208.46875 C 782.52254,-208.00898 784.22429,-207.8305 785.90625,-207.78125 C 785.46468,-207.44449 784.39374,-206.75352 782.40625,-206 C 779.67232,-204.96351 775.95427,-203.83731 774.40625,-203.5625 C 772.94163,-203.30248 771.34667,-203.67904 771.0625,-203.75 C 771.22275,-203.67035 771.44294,-203.42902 770.84375,-202.6875 C 769.97909,-201.61744 764.92723,-199.86935 762.09375,-199 C 759.27295,-198.13453 755.88625,-197.84369 751.75,-198.78125 C 747.99741,-199.63186 746.70215,-200.49772 742.875,-201.375 C 742.875,-201.375 742.875,-201.21875 742.875,-201.21875 C 746.30296,-199.98096 747.86241,-198.58645 751.25,-197.6875 C 752.88216,-197.25436 754.61704,-197.10449 756.34375,-197.125 C 755.83482,-196.74083 754.867,-196.10318 753.09375,-195.375 C 750.38741,-194.26366 746.65742,-193.06719 745.125,-192.75 C 744.3246,-192.58431 743.51269,-192.53138 742.875,-192.59375 C 742.875,-192.59375 742.875,-192.07823 742.875,-191.67146 C 742.875,-191.40639 742.875,-191.1875 742.875,-191.1875 C 743.10145,-191.33218 743.32391,-191.46011 743.59375,-191.5625 C 744.67427,-191.97248 745.76536,-191.77827 749.59375,-193.25 C 753.42218,-194.72174 754.81787,-195.25498 755.5,-195.65625 C 756.1796,-196.05603 757.11165,-196.53562 757.71875,-197.1875 C 759.5456,-197.32525 761.2895,-197.68073 762.65625,-198.1875 C 765.62437,-199.28802 767.53162,-199.99369 769.4375,-200.65625 C 771.34336,-201.31879 771.79159,-202.07112 772.84375,-202.4375 C 773.9353,-202.81761 775.03886,-202.60288 778.90625,-203.96875 C 782.7737,-205.33461 784.18941,-205.79583 784.875,-206.1875 C 785.57609,-206.58802 786.57581,-207.12048 787.1875,-207.78125 C 789.1583,-207.83591 791.00435,-208.16588 792.46875,-208.65625 C 795.47023,-209.66133 797.3949,-210.27796 799.3125,-210.90625 C 800.8511,-211.41035 801.48652,-211.95302 802.21875,-212.34375 C 802.44891,-212.47806 802.69449,-212.59748 802.96875,-212.6875 C 804.06698,-213.04798 805.1502,-212.76887 809.0625,-214 C 812.97483,-215.23113 814.42855,-215.67295 815.125,-216.03125 C 815.81888,-216.38822 816.75515,-216.82386 817.375,-217.4375 C 819.24021,-217.46016 821.01081,-217.70433 822.40625,-218.125 C 825.43668,-219.03854 827.39863,-219.5551 829.34375,-220.09375 C 831.28886,-220.63239 831.76993,-221.35827 832.84375,-221.65625 C 833.95776,-221.9654 835.06369,-221.64886 839.03125,-222.6875 C 842.99886,-223.72613 844.44883,-224.12023 845.15625,-224.4375 C 845.89112,-224.76709 846.97008,-225.19122 847.59375,-225.8125 C 849.59149,-225.6965 851.45118,-225.83259 852.9375,-226.1875 C 856.01561,-226.9225 858.02094,-227.28844 860,-227.6875 C 861.58792,-228.00768 862.24429,-228.47805 863,-228.78125 C 863.23757,-228.88805 863.46695,-228.97401 863.75,-229.03125 C 864.88347,-229.26044 866.05448,-228.82232 870.09375,-229.53125 C 874.13308,-230.24018 875.594,-230.45834 876.3125,-230.71875 C 877.02836,-230.97819 878.01678,-231.28599 878.65625,-231.8125 C 880.58052,-231.57301 882.40413,-231.58797 883.84375,-231.8125 C 886.97008,-232.30012 888.9983,-232.51317 891,-232.78125 C 893.00171,-233.04932 893.48869,-233.72639 894.59375,-233.875 C 895.74014,-234.02918 896.86967,-233.57343 900.9375,-234.09375 C 905.00534,-234.61407 906.49763,-234.78948 907.21875,-235.03125 C 907.95585,-235.27839 909.01684,-235.61748 909.65625,-236.15625 C 911.70632,-235.82072 913.63003,-235.75829 915.15625,-235.9375 C 918.29856,-236.30646 920.33619,-236.49686 922.34375,-236.71875 C 923.95451,-236.89677 924.60842,-237.32695 925.375,-237.5625 C 925.61594,-237.64802 925.86912,-237.7181 926.15625,-237.75 C 927.30603,-237.87772 928.45754,-237.40335 932.53125,-237.875 C 936.60499,-238.34665 938.09034,-238.4856 938.8125,-238.71875 C 939.53196,-238.95102 940.51274,-239.19221 941.15625,-239.6875 C 943.09262,-239.35404 944.92631,-239.28326 946.375,-239.4375 C 949.52102,-239.77245 951.55256,-239.95609 953.5625,-240.15625 C 955.57246,-240.35639 956.04664,-240.98264 957.15625,-241.09375 C 958.30739,-241.20903 959.45268,-240.72869 963.53125,-241.15625 C 967.60986,-241.58381 969.12011,-241.71834 969.84375,-241.9375 C 970.5829,-242.16136 971.63947,-242.45075 972.28125,-242.96875 C 974.33835,-242.57008 976.28312,-242.47535 977.8125,-242.625 C 980.96123,-242.9331 982.98834,-243.09825 985,-243.28125 C 986.61407,-243.42807 987.2631,-243.8418 988.03125,-244.0625 C 988.27267,-244.14336 988.52478,-244.19241 988.8125,-244.21875 C 989.96461,-244.3242 991.10546,-243.826 995.1875,-244.21875 C 999.26958,-244.6115 1000.7764,-244.74959 1001.5,-244.96875 C 1002.2209,-245.18708 1003.1997,-245.41645 1003.8438,-245.90625 C 1005.7818,-245.55626 1007.6126,-245.45187 1009.0625,-245.59375 C 1012.2112,-245.90185 1014.2383,-246.067 1016.25,-246.25 C 1018.2616,-246.43299 1018.7642,-247.08802 1019.875,-247.1875 C 1021.0273,-247.29073 1022.1672,-246.80267 1026.25,-247.1875 C 1030.3329,-247.57232 1031.8387,-247.6885 1032.5625,-247.90625 C 1033.3018,-248.12868 1034.3581,-248.42074 1035,-248.9375 C 1037.0574,-248.53573 1039.0029,-248.43417 1040.5313,-248.59375 C 1043.6779,-248.92227 1045.7084,-249.11645 1047.7188,-249.3125 C 1049.3318,-249.46979 1049.9844,-249.94398 1050.75,-250.1875 C 1050.9907,-250.27554 1051.2132,-250.30887 1051.5,-250.34375 C 1052.6483,-250.48345 1053.8167,-250.00384 1057.875,-250.59375 C 1061.9333,-251.18367 1063.4368,-251.37089 1064.1563,-251.625 C 1064.873,-251.87816 1065.8308,-252.18307 1066.4688,-252.71875 C 1068.3885,-252.50681 1070.1887,-252.56734 1071.625,-252.8125 C 1074.7441,-253.3449 1076.7366,-253.74111 1078.7188,-254.125 C 1080.7009,-254.50887 1081.1931,-255.16465 1082.2813,-255.40625 C 1083.4101,-255.65691 1084.5516,-255.28996 1088.5313,-256.28125 C 1092.5109,-257.27253 1093.9609,-257.70055 1094.6563,-258.0625 C 1095.3786,-258.43851 1096.4182,-258.93308 1097.0313,-259.59375 C 1098.9943,-259.6058 1100.825,-259.8848 1102.25,-260.4375 C 1105.2012,-261.58211 1107.1232,-262.30692 1108.9375,-263.1875 C 1110.3932,-263.89403 1111.2723,-264.87391 1111.4844,-265.17188 C 1111.6966,-265.46984 1111.5962,-265.91718 1111.6223,-265.93863 C 1111.6652,-265.97387 1111.9416,-266.0236 1112.1013,-266.36707 C 1112.9602,-268.21415 1114.4223,-272.01166 1114.5365,-272.69652 C 1114.6502,-273.37868 1114.7003,-274.04426 1114.751,-274.44149 C 1114.7804,-274.67101 1114.6043,-275.30693 1114.6264,-275.36553 C 1114.6573,-275.44759 1114.9309,-275.63081 1114.9863,-275.88024 C 1115.2526,-277.07857 1115.0752,-278.07153 1114.8612,-279.48917 C 1114.6472,-280.90681 1113.8775,-284.11131 1113.2243,-284.96543 C 1112.5654,-285.82715 1112.0014,-285.9766 1111.4764,-285.96609 C 1111.2678,-285.69633 1111.6132,-285.703 1111.639,-285.65348 C 1112.3196,-285.60269 1112.573,-285.28484 1113.0582,-284.75686 C 1113.5434,-284.22888 1114.28,-280.90569 1114.4166,-279.4553 C 1114.5532,-278.00491 1114.6066,-276.5951 1114.3286,-275.98666 C 1114.0505,-275.37821 1113.6054,-275.46963 1113.313,-275.40375 C 1113.844,-275.21786 1113.9828,-275.27892 1114.0444,-274.43446 C 1114.1037,-273.62108 1113.9112,-272.79477 1113.5246,-271.62884 C 1113.1334,-270.44883 1111.6794,-267.27886 1111.2389,-267.03007 C 1110.7866,-266.77456 1110.5075,-266.75969 1110.1719,-266.89063 z"
2648 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7337);enable-background:new"
2649 sodipodi:nodetypes="cssscscsscsssccscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssscscssssssscscsscsssccscsscscssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsssssscssssscsszsszssszzcczzzczzzc" />
2650 <path
2651 id="path8183"
2652 d="M 988.75,-263.84375 C 990.66161,-263.20935 993.30027,-262.08534 994.875,-261.03125 C 996.44977,-259.97716 997.7711,-259.54873 1000.4063,-257.65625 C 1003.0145,-255.78311 1005.4332,-254.64103 1007.7188,-253.59375 C 1010.1881,-252.46228 1013.4709,-251.43901 1017.25,-249.65625 C 1016.0428,-250.91465 1010.111,-253.0207 1008.2188,-253.84375 C 1006.3266,-254.66679 1004.0908,-255.77424 1001.2813,-257.625 C 998.47169,-259.47575 997.65906,-260.10654 995.28125,-261.34375 C 992.90343,-262.58094 991.20137,-263.29295 988.75,-263.84375 z"
2653 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7297);enable-background:new" />
2654 <path
2655 id="path8185"
2656 d="M 957.5,-260.78125 C 959.41,-260.16315 962.08288,-259.07191 963.65625,-258.03125 C 965.22964,-256.99059 966.55233,-256.54873 969.1875,-254.65625 C 971.79573,-252.7831 974.21442,-251.64104 976.5,-250.59375 C 978.96931,-249.46228 982.25213,-248.439 986.03125,-246.65625 C 984.82397,-247.91465 978.82971,-250.05195 976.9375,-250.875 C 975.04533,-251.69804 972.84084,-252.8055 970.03125,-254.65625 C 967.22167,-256.507 966.4383,-257.09557 964.0625,-258.3125 C 961.68672,-259.52941 959.94929,-260.25135 957.5,-260.78125 z"
2657 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7301);enable-background:new" />
2658 <path
2659 id="path8187"
2660 d="M 926.09375,-257.375 C 928.00147,-256.77755 930.64723,-255.71116 932.21875,-254.6875 C 933.79025,-253.66385 935.08897,-253.24779 937.71875,-251.40625 C 940.32166,-249.58352 942.74762,-248.43405 945.03125,-247.40625 C 947.49845,-246.29584 950.7866,-245.31302 954.5625,-243.5625 C 953.35627,-244.8106 947.3906,-246.88059 945.5,-247.6875 C 943.60942,-248.4944 941.39758,-249.57854 938.59375,-251.375 C 935.7899,-253.17144 934.96671,-253.77751 932.59375,-254.96875 C 930.22078,-256.15999 928.54013,-256.87158 926.09375,-257.375 z"
2661 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7305);enable-background:new" />
2662 <path
2663 id="path8189"
2664 d="M 894.90625,-253.5625 C 896.80838,-253.00895 899.49326,-251.97363 901.0625,-250.96875 C 902.63173,-249.96388 903.93651,-249.56011 906.5625,-247.75 C 909.16162,-245.95836 911.56284,-244.87811 913.84375,-243.875 C 916.30803,-242.79126 919.60359,-241.83471 923.375,-240.125 C 922.1702,-241.36007 916.20084,-243.36978 914.3125,-244.15625 C 912.42418,-244.94272 910.2373,-245.98705 907.4375,-247.75 C 904.63773,-249.51294 903.83831,-250.11836 901.46875,-251.28125 C 899.09918,-252.44413 897.3455,-253.11537 894.90625,-253.5625 z"
2665 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7309);enable-background:new" />
2666 <path
2667 id="path8191"
2668 d="M 863.71875,-248.65625 C 865.59937,-248.22716 868.22302,-247.27587 869.78125,-246.34375 C 871.33948,-245.41164 872.63358,-245.08599 875.25,-243.34375 C 877.83971,-241.61931 880.23067,-240.63573 882.5,-239.71875 C 884.95176,-238.72806 888.23959,-237.84168 892,-236.21875 C 890.79869,-237.42609 884.84751,-239.28484 882.96875,-240 C 881.09,-240.71517 878.88335,-241.68442 876.09375,-243.375 C 873.30412,-245.06557 872.50914,-245.60322 870.15625,-246.65625 C 867.80333,-247.70926 866.13041,-248.36873 863.71875,-248.65625 z"
2669 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7313);enable-background:new" />
2670 <path
2671 id="path8193"
2672 d="M 833.15625,-241.375 C 835.00461,-241.07856 837.6257,-240.39868 839.15625,-239.59375 C 840.68683,-238.78882 841.96999,-238.53802 844.53125,-237.0625 C 847.06629,-235.60204 849.42193,-234.73741 851.65625,-234 C 854.07024,-233.20332 857.31336,-232.53311 861.03125,-231.15625 C 859.84354,-232.28498 853.94353,-233.746 852.09375,-234.3125 C 850.24398,-234.879 848.09033,-235.68642 845.34375,-237.15625 C 842.59718,-238.62608 841.84239,-239.07653 839.53125,-239.9375 C 837.2201,-240.79845 835.52654,-241.25759 833.15625,-241.375 z"
2673 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7317);enable-background:new" />
2674 <path
2675 id="path8195"
2676 d="M 802.90625,-232.3125 C 804.72845,-232.10123 807.27201,-231.51193 808.78125,-230.78125 C 810.2905,-230.05059 811.53693,-229.85127 814.0625,-228.5 C 816.56226,-227.16254 818.89404,-226.45157 821.09375,-225.84375 C 823.47028,-225.18708 826.65839,-224.77087 830.3125,-223.65625 C 829.14515,-224.70121 823.38362,-225.75954 821.5625,-226.21875 C 819.74139,-226.67796 817.61025,-227.34571 814.90625,-228.65625 C 812.20222,-229.96677 811.43519,-230.37615 809.15625,-231.125 C 806.8773,-231.87383 805.243,-232.30431 802.90625,-232.3125 z"
2677 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7321);enable-background:new" />
2678 <path
2679 id="path8197"
2680 d="M 773.1875,-222.1875 C 774.99859,-222.0088 777.50809,-221.52244 779,-220.84375 C 780.49194,-220.16506 781.7534,-220.04553 784.25,-218.78125 C 786.72107,-217.52987 789.04005,-216.88511 791.21875,-216.34375 C 793.57262,-215.75887 796.71009,-215.44623 800.3125,-214.5 C 799.16166,-215.49116 793.45999,-216.2833 791.65625,-216.6875 C 789.85253,-217.0917 787.74072,-217.70866 785.0625,-218.9375 C 782.38432,-220.16634 781.65905,-220.54839 779.40625,-221.21875 C 777.15346,-221.88909 775.50998,-222.22107 773.1875,-222.1875 z"
2681 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7329);enable-background:new" />
2682 <path
2683 id="path8199"
2684 d="M 743.5625,-211.1875 C 745.35531,-211.05839 747.83563,-210.63785 749.3125,-210 C 750.7894,-209.36215 752.0286,-209.25844 754.5,-208.0625 C 756.94618,-206.87878 759.22054,-206.31584 761.375,-205.84375 C 763.70267,-205.33372 766.7946,-205.16311 770.375,-204.28125 C 769.23121,-205.25185 763.62741,-205.8719 761.84375,-206.21875 C 760.06008,-206.56559 757.9609,-207.10631 755.3125,-208.25 C 752.66409,-209.39368 751.91755,-209.76631 749.6875,-210.375 C 747.45742,-210.98368 745.86156,-211.28466 743.5625,-211.1875 z"
2685 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;filter:url(#filter7325);enable-background:new" />
2686 </g>
2687 </g>
2688 <path
2689 style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2690 d="M 863.87812,475.6679 C 865.52024,472.4499 867.39593,469.93261 868.73948,465.81892 C 869.5382,462.16103 872.05152,463.78819 875.99995,457.42202 C 877.40188,455.18252 881.47648,457.81338 884.96505,455.02291 C 886.23577,454.21972 887.84993,454.6186 889.44761,454.95978 C 893.213,456.27874 895.27337,458.66333 897.78137,460.76815 C 903.92043,466.73838 918.31551,468.71142 921.26741,467.08161 C 922.70146,464.17687 929.14869,461.67273 933.64178,455.96993 C 934.38989,454.84726 945.37114,447.22547 948.28899,449.40394"
2691 id="path8201"
2692 sodipodi:nodetypes="ccccccccc" />
2693 <path
2694 style="opacity:1;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2695 d="M 888.50059,465.25071 C 895.864,462.01774 902.31149,456.34231 909.20872,451.86619 C 912.51929,449.89665 916.07855,455.0822 920.00472,455.46485 C 922.30245,455.24672 923.71762,456.66744 925.68683,457.10635 C 930.84319,458.42414 928.08476,460.97123 935.66209,463.54607 C 941.8177,465.26647 944.56949,456.7476 950.56184,456.22247 C 955.43923,455.71948 958.66076,455.90644 962.17859,455.96993 C 966.10555,456.10882 966.25714,452.47233 968.23951,450.66663 C 971.22007,447.86141 975.39512,448.81691 978.38436,445.92573 C 979.4019,444.54105 980.33894,442.91488 981.11895,440.81764 C 982.00096,438.8173 984.15901,441.12362 985.91718,442.08033"
2696 id="path8203"
2697 sodipodi:nodetypes="ccccccccccc" />
2698 </g>
2699 <g
2700 inkscape:groupmode="layer"
2701 id="layer15"
2702 inkscape:label="Feet"
2703 style="display:inline">
2704 <path
2705 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9048);enable-background:accumulate"
2706 d="M 403.27922,1056.3058 L 459.84776,1013.8794 L 531.97265,1028.0215 L 485.30361,1080.3474 L 431.56349,1087.4185 L 403.27922,1056.3058 z"
2707 id="path8994" />
2708 <path
2709 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2710 d="M 542.27183,1060.5719 C 540.86456,1079.3731 541.12693,1093.3229 544.35357,1109.8752 C 547.58023,1126.4275 560.75966,1155.7825 564.68798,1173.0589 C 568.61419,1190.326 567.38211,1211.3686 552.22854,1224.2072 C 536.91093,1237.1846 510.17726,1245.8061 484.39623,1239.9409 C 458.61518,1234.0757 414.84716,1190.7175 395.80604,1169.7126 C 376.6939,1148.6293 332.04518,1075.862 317.86751,1045.4368 C 303.68984,1015.0117 305.2079,1008.7182 309.74779,999.90708 C 300.38107,975.38658 297.33408,949.84027 276.03534,924.33044 C 306.36081,927.44488 319.91562,951.28677 336.16102,971.47019 C 330.63113,923.39416 318.10631,907.05369 307.78707,880.74589 C 337.78137,886.82754 358.36643,912.61828 371.76686,953.45839 C 381.32101,949.54048 390.00462,944.08545 401.95427,944.39719 C 390.65677,902.70139 384.00481,874.48135 365.26702,843.32725 C 418.70898,848.99758 448.92404,923.96657 444.23844,931.28805 C 454.21641,929.04406 463.24409,924.75767 474.67497,925.63638 C 463.426,887.28936 453.62716,848.76848 471.01526,806.98819 C 471.01526,806.98819 519.30204,872.42507 525.40492,892.79397 C 531.50779,913.16287 526.92373,931.49448 526.92373,931.49448 C 526.92373,931.49448 543.8833,962.57978 547.21765,982.58862 C 550.59075,1002.83 543.68496,1041.6919 542.27183,1060.5719 z"
2711 id="path4189"
2712 sodipodi:nodetypes="czzzzzzcccccccccczczz" />
2713 <path
2714 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3587);enable-background:accumulate"
2715 d="M 719.5,738.69519 L 737.81177,754.12715 L 782.2228,738.73894 L 805.5,713.19519 L 816.96397,732.41584 L 847.63558,745.19938 L 872.73295,750.92775 L 892,723.19519 L 908.02309,747.02126 L 947,752.19519 L 957.24541,745.99667 L 964.00012,754.69487 L 989.5,765.69519 L 991.5,725.19519 L 955.94866,710.6576 L 923.45591,689.1305 L 883.0038,677.66492 L 861.69668,662.13148 L 840,685.19519 L 755.02878,638.61208 L 722,676.69519 L 719.5,738.69519 z"
2716 id="path4191"
2717 sodipodi:nodetypes="cccccccccccccccccccccc"
2718 clip-path="url(#clipPath3631)"
2719 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,995.28646,23.53493)" />
2720 <path
2721 style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter3898);enable-background:new"
2722 d="M 584,696.5 L 577.4375,713.65625 C 577.4375,713.65625 569.62598,734.02113 561.75,757.3125 C 557.81201,768.95818 553.86698,781.35395 550.8125,792.4375 C 547.75802,803.52105 545.47664,812.81736 545.3125,820.71875 C 544.91443,839.88071 551.05903,855.60705 554.25,862.46875 C 553.47847,866.02398 552.25863,871.92307 550.90625,880.5625 C 548.98583,892.83071 547.18798,907.71691 548.53125,920.4375 C 549.91334,933.52585 555.34347,948.62515 561.125,963.46875 C 566.90653,978.31235 573.17935,992.69586 576.34375,1001.5 C 582.97581,1019.9519 586.33671,1033.0763 587.65625,1050 C 588.2376,1057.4561 587.41398,1070.336 586.40625,1083.375 C 585.39852,1096.414 584.21964,1109.6764 584.71875,1120.3438 C 585.70786,1141.4836 594.04673,1167.1785 618.09375,1178.2812 C 640.86858,1188.7966 673.42057,1189.9834 701.53125,1174.8438 C 717.69117,1166.1404 731.60759,1147.7462 744.90625,1127.9375 C 758.20491,1108.1288 769.87542,1086.8841 776.84375,1073.0312 C 792.19667,1042.51 816.23728,957.56702 822.46875,920.3125 C 825.48734,902.26597 826.39041,891.24695 825.09375,882.28125 C 824.11522,875.51521 821.26556,870.13385 818.21875,866.0625 C 820.26149,838.55459 817.48668,814.69372 830.1875,786.65625 L 840.75,763.375 L 816.9375,772.6875 C 799.44775,779.52503 788.03586,791.73286 780.34375,804.75 C 780.02124,805.29577 779.78061,805.85776 779.46875,806.40625 C 779.69078,783.89104 783.87659,768.76866 786.0625,747.71875 L 788.03125,728.71875 L 771,737.375 C 740.40551,752.93071 725.30511,785.56821 721.28125,827.59375 C 717.03593,826.96828 712.44985,826.5741 707.46875,826.75 C 707.17726,787.56964 707.07246,759.71315 716.0625,727.375 L 721.65625,707.25 L 702.21875,714.90625 C 671.30938,727.11019 654.35921,756.83698 645.59375,783.28125 C 641.21102,796.50339 638.84793,809.08246 638,819.21875 C 637.76797,821.99248 637.68894,824.53007 637.6875,826.9375 C 634.44563,826.90109 631.26698,827.07339 627.625,827.4375 C 627.66662,788.43277 624.14076,747.68335 595.34375,710.9375 L 584,696.5 z M 589.8125,740.3125 C 606.61941,770.95633 607.28701,804.27978 606.75,840.0625 L 606.53125,855.125 L 618.56618,848.58579 C 627.22823,845.45277 638.12676,848.35827 650.5,847.75 L 665.17465,857.1066 L 658.84375,831.3125 C 658.7541,831.08253 658.62329,830.89581 658.59375,830.59375 C 658.39424,828.55389 658.37143,825.12068 658.71875,820.96875 C 659.41339,812.66489 661.50832,801.38351 665.34375,789.8125 C 670.49907,774.25956 678.83176,758.62002 690.46875,747.28125 C 685.78494,775.91923 687.25316,807.54059 687.45711,843.08639 L 684.69118,856.34803 L 700.1875,848.75 C 709.2169,845.99229 717.37647,848.40004 729.46875,849.84375 L 742.71507,859.28798 L 741.09375,840 C 742.54168,809.02823 749.31524,786.32192 761.8125,771.125 C 758.82562,790.90384 756.38207,812.9098 762.125,849.46875 L 763.19052,855.84193 L 760.25237,867.35878 L 770.86948,859.1906 L 780.05921,869.41258 L 778.51093,858.94898 L 781.9375,852 C 787.63852,838.78851 792.11032,825.78663 798.28125,815.34375 C 799.24111,813.71941 800.31278,812.27939 801.34375,810.78125 C 797.66309,831.9366 798.91659,850.9894 797.25,868.5625 L 792.56986,876.36948 L 799.96875,876.59375 C 803.1888,880.07736 803.83625,880.44443 804.53125,885.25 C 805.22625,890.05557 804.84987,899.65035 801.96875,916.875 C 796.40076,950.16292 770.12313,994.71481 758.22835,1018.3614 C 751.62344,1031.4918 739.70002,1075.8473 727.105,1094.6079 C 714.50998,1113.3684 698.57363,1134.3752 689.93296,1139.0288 C 668.44244,1150.603 645.37702,1164.5347 629.31407,1157.1183 C 614.93921,1150.4813 606.27438,1135.9256 605.5,1119.375 C 605.11689,1111.187 606.11279,1098.0658 607.125,1084.9688 C 608.13721,1071.8717 618.41391,1062.398 622.54839,1048.4062 C 627.92068,1030.2254 621.10152,1011.8118 610.04839,994.46875 C 603.56184,984.29097 586.07159,970.21085 580.5,955.90625 C 574.92841,941.60165 570.13249,926.9031 569.21875,918.25 C 568.29254,909.47887 569.64125,895.22498 571.4375,883.75 C 573.23375,872.27503 575.28125,863.46875 575.28125,863.46875 L 584.70403,859.85355 L 574.21875,855.96875 C 574.21875,855.96875 565.71986,840.65865 566.125,821.15625 C 566.19611,817.73309 567.96126,808.4282 570.84375,797.96875 C 573.72624,787.5093 577.60841,775.41604 581.46875,764 C 584.51314,754.99692 587.24938,747.39655 589.8125,740.3125 z"
2723 id="path4193"
2724 clip-path="url(#clipPath3677)"
2725 sodipodi:nodetypes="ccssscsssssssssssssccccscccccccccsscccccccccccssscccccccccccccccsccccssssssssssssscccsssc"
2726 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,822.28931,10.93589)" />
2727 <g
2728 id="g3617"
2729 clip-path="url(#clipPath3622)"
2730 transform="translate(276,136)">
2731 <path
2732 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,-52.200498,74.09707)"
2733 id="path4195"
2734 d="M -15.66751,843.48852 L -65.16499,827.93217 L -92.03504,880.25807 L -51.02285,925.51291 L -1.52538,887.32914 L -15.66751,843.48852 z"
2735 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9024);enable-background:accumulate" />
2736 <path
2737 sodipodi:nodetypes="ccccccccccccc"
2738 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,-46.92842,75.511284)"
2739 id="path4197"
2740 d="M 118.70648,859.93048 L 63.552152,813.26144 L 19.711532,850.03099 L 53.652662,903.7711 L 40.055848,989.23313 L 0.61048221,1017.5253 L -40.401718,1028.839 L -43.230138,1075.508 L 13.338402,1100.9639 L 32.282389,1031.3139 L 55.738939,972.45727 L 102.08648,899.84236 L 118.70648,859.93048 z"
2741 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9020);enable-background:accumulate" />
2742 </g>
2743 <path
2744 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9044);enable-background:accumulate"
2745 d="M -70.82184,932.58397 L -10.01066,905.71392 L 90.3985,936.82662 L 26.75889,967.93931 L -55.26549,950.96875 L -70.82184,932.58397 z"
2746 id="path4199"
2747 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,229.07158,211.51128)" />
2748 <path
2749 style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter4105);enable-background:new"
2750 d="M 583.0625,715.75 C 570.95641,750.19974 556.348,784.28333 551.3125,820.59375 C 550.48042,835.52242 555.90165,849.75318 560.15625,863.65625 C 554.24001,890.85751 550.01944,920.5562 561.3125,946.78125 C 574.82967,984.9421 596.31397,1022.4634 593.73529,1064.2495 C 592.78699,1093.5437 584.72085,1125.2436 599.125,1152.5312 C 609.32364,1171.866 632.26456,1179.8429 653.09285,1180.1988 C 680.95504,1181.3729 709.55546,1168.5772 725.09375,1144.9375 C 747.68924,1115.5658 766.89426,1083.4402 780.3324,1048.8777 C 797.22293,1003.3717 810.0042,956.31698 818.26642,908.4788 C 820.09082,895.53774 821.3675,881.00895 813.6875,869.65625 C 810.25635,862.31993 813.72957,854.09611 813.00293,846.34648 C 813.67693,821.35182 817.01525,795.68272 829.65625,773.75 C 811.92312,780.1946 794.58357,790.30971 785.65318,807.61425 C 781.7181,814.3238 778.04836,821.18838 774.28125,828 C 770.73126,797.98592 778.00088,768.35172 781.0625,738.71875 C 760.89646,747.77338 744.18578,764.37397 736.88755,785.40075 C 730.58292,800.98078 728.08533,817.71793 726.625,834.4375 C 718.37166,832.91825 709.94053,832.33595 701.5625,832.9375 C 700.59942,794.23963 701.09554,753.53035 712.53125,717.03125 C 693.85012,723.24901 677.36504,735.76676 666.90322,752.41848 C 653.05068,773.29827 645.64182,798.17243 643.84375,823.03125 C 644.42909,827.35579 643.78249,834.87134 637.5,832.90625 C 632.16882,832.9238 626.87092,833.58508 621.5625,834 C 622.71034,794.61852 618.22106,752.3718 594.5,719.78125 C 591.43929,716.14408 588.86315,712.09687 585.875,708.4375 C 584.9375,710.875 584,713.3125 583.0625,715.75 z M 590.8125,729.59375 C 609.37777,758.89004 613.295,794.41387 612.9375,828.46875 C 613.14159,833.64401 612.42094,840.29795 613.0625,844.53125 C 625.38106,838.4285 639.80162,842.09135 652.84375,842.34375 C 655.16087,843.567 656.03585,843.99618 654.75,840.9375 C 650.58545,826.98465 652.90172,812.3245 656.55504,798.52986 C 662.92191,772.23922 677.18332,747.44188 699.375,731.5 C 690.75791,768.73706 693.65842,808.06161 693.28125,845.46875 C 705.53469,838.55885 720.56004,842.02262 733.3125,845.21875 C 736.70472,848.75355 735.60185,844.48927 735.5,841.40625 C 735.01691,820.03567 739.63133,798.33662 749.1875,779.25 C 755.15016,768.56273 763.43088,759.44621 771.625,750.375 C 763.75344,784.2131 762.4221,819.71093 768.90625,853.875 C 770.6311,852.46382 773.51306,853.42086 774.5625,853.5 C 784.24619,832.26318 790.91362,808.11938 809.45266,792.75815 C 811.32595,792.38693 808.00448,801.2831 807.96875,804.65625 C 804.43387,826.50206 800.79359,848.79859 799.18454,870.87536 C 790.40075,873.21707 802.03289,873.1989 802.65329,874.93786 C 810.5764,885.50366 807.31628,899.34258 806.28494,911.2912 C 799.22089,956.32475 784.14263,998.65314 770.33139,1041.971 C 758.25663,1074.9203 742.95719,1100.8235 722.44331,1129.1725 C 711.49074,1142.7239 699.19859,1157.0238 681.59956,1161.6725 C 661.44355,1167.9138 637.3928,1172.5494 619,1161.7188 C 601.71034,1149.3774 597.97607,1126.0099 599.73774,1106.0324 C 599.78653,1090.2062 604.6766,1077.5203 604.14834,1062.5406 C 603.6101,1047.2777 601.85699,1031.9759 597.60573,1015.6743 C 593.35447,999.37268 588.56248,990.75636 581.48667,974.10092 C 574.24556,957.05636 566.41652,937.35229 563.28125,917.8125 C 561.53177,899.18536 566.17296,880.68988 569.0625,862.5625 C 572.35873,859.72554 567.46451,857.36591 566.75,854.375 C 559.14887,837.35992 558.34253,817.6001 564.00766,799.81502 C 571.13786,774.74272 579.76853,750.18261 588.6875,725.6875 C 589.39583,726.98958 590.10417,728.29167 590.8125,729.59375 z"
2751 id="path4201"
2752 sodipodi:nodetypes="ccccccccccccccccccccccccccccccccccccccccccccccccccczzzcccccc"
2753 clip-path="url(#clipPath4177)"
2754 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,822.28931,10.93589)" />
2755 <path
2756 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4130);enable-background:accumulate"
2757 d="M 735.05635,733.03834 L 737.81177,754.12715 L 782.2228,738.73894 L 787.07343,716.34919 L 783.13726,694.29697 L 760.68563,657.70396 L 752.40559,688.0089 L 735.05635,733.03834 z"
2758 id="path4203"
2759 sodipodi:nodetypes="cccccccc"
2760 clip-path="url(#clipPath3631)"
2761 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,995.28646,23.53493)" />
2762 <path
2763 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4141);enable-background:accumulate"
2764 d="M 831.81321,730.29452 L 847.63558,745.19938 L 868.49031,748.09932 L 866.90002,708.17334 L 875.22563,677.66492 L 868.06064,671.32386 L 846.36395,692.26626 L 831.81321,730.29452 z"
2765 id="path4205"
2766 sodipodi:nodetypes="cccccccc"
2767 clip-path="url(#clipPath3631)"
2768 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,995.28646,23.53493)" />
2769 <g
2770 id="g8317"
2771 style="filter:url(#filter8333)"
2772 clip-path="url(#clipPath8338)"
2773 transform="translate(276,136)">
2774 <path
2775 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,719.28646,-112.46507)"
2776 clip-path="none"
2777 sodipodi:nodetypes="ccccc"
2778 id="path4209"
2779 d="M 964.00012,754.69487 L 982.42893,762.15966 L 991.5,725.19519 L 976.62969,730.03405 L 964.00012,754.69487 z"
2780 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2781 <rect
2782 y="757.19519"
2783 x="-55"
2784 height="177"
2785 width="182"
2786 id="rect8315"
2787 style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2788 </g>
2789 <g
2790 id="g8346"
2791 style="filter:url(#filter8354)"
2792 clip-path="url(#clipPath8359)"
2793 transform="translate(276,136)">
2794 <path
2795 transform="matrix(-0.9045327,0.2506626,0.2506626,0.9045327,719.28646,-112.46507)"
2796 clip-path="none"
2797 sodipodi:nodetypes="ccccccc"
2798 id="path4207"
2799 d="M 910.14441,746.31415 L 942.75736,751.48808 L 942.39617,727.61189 L 949.5847,697.92968 L 941.13358,692.66603 L 919.31164,719.1768 L 910.14441,746.31415 z"
2800 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2801 <rect
2802 y="696.19519"
2803 x="-22"
2804 height="176"
2805 width="165"
2806 id="rect8344"
2807 style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2808 </g>
2809 </g>
2810 <g
2811 inkscape:groupmode="layer"
2812 id="layer16"
2813 inkscape:label="Left Foot"
2814 style="display:inline">
2815 <path
2816 style="opacity:1;fill:#ada469;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;display:inline;enable-background:new"
2817 d="M 1036.164,1071.8338 C 1042.9581,1090.7366 1046.6577,1105.1335 1048.0543,1123.0457 C 1049.4509,1140.958 1044.2716,1174.8465 1045.1538,1193.7018 C 1046.0356,1212.547 1053.2875,1233.8008 1072.4984,1242.6707 C 1091.9173,1251.6365 1121.8177,1252.882 1146.6183,1239.5251 C 1171.4189,1226.1681 1204.0193,1169.1996 1217.5925,1142.2164 C 1231.2164,1115.1325 1256.3536,1027.719 1262.2533,992.44781 C 1268.1531,957.1766 1264.8039,951.14704 1257.6359,943.39232 C 1260.2762,915.55217 1256.1361,888.45689 1270.7455,856.20614 C 1240.4965,868.03184 1233.3632,896.36684 1222.4266,921.71122 C 1214.4257,870.77829 1222.6358,850.43803 1225.7455,820.49186 C 1196.6808,835.26977 1182.884,867.60588 1180.7455,913.349 C 1169.8216,912.0448 1159.3541,908.91477 1147.1741,912.63471 C 1146.9101,866.61137 1145.7106,835.7453 1156.0847,798.42822 C 1102.8293,819.45508 1093.1375,905.02232 1100.0312,911.20614 C 1089.1484,911.74114 1078.6602,909.90884 1067.1741,914.06329 C 1067.813,871.49194 1066.9136,829.15468 1037.1741,791.20614 C 1037.1741,791.20614 1006.2161,872.12848 1005.7455,894.77757 C 1005.275,917.42666 1015.1971,934.94345 1015.1971,934.94345 C 1015.1971,934.94345 1006.6291,971.68396 1008.8985,993.17568 C 1011.1944,1014.9171 1029.3414,1052.8519 1036.164,1071.8338 z"
2818 id="path8848"
2819 sodipodi:nodetypes="czzzzzzcccccccccczczz" />
2820 <path
2821 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter3587);enable-background:accumulate"
2822 d="M 719.5,738.69519 L 737.81177,754.12715 L 782.2228,738.73894 L 805.5,713.19519 L 816.96397,732.41584 L 847.63558,745.19938 L 872.73295,750.92775 L 892,723.19519 L 908.02309,747.02126 L 947,752.19519 L 957.24541,745.99667 L 964.00012,754.69487 L 989.5,765.69519 L 991.5,725.19519 L 955.94866,710.6576 L 923.45591,689.1305 L 883.0038,677.66492 L 861.69668,662.13148 L 840,685.19519 L 755.02878,638.61208 L 722,676.69519 L 719.5,738.69519 z"
2823 id="path3635"
2824 sodipodi:nodetypes="cccccccccccccccccccccc"
2825 clip-path="url(#clipPath3631)"
2826 transform="translate(276,136)" />
2827 <path
2828 transform="translate(450.03125,73.843964)"
2829 style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter3898);enable-background:new"
2830 d="M 584,696.5 L 577.4375,713.65625 C 577.4375,713.65625 569.62598,734.02113 561.75,757.3125 C 557.81201,768.95818 553.86698,781.35395 550.8125,792.4375 C 547.75802,803.52105 545.47664,812.81736 545.3125,820.71875 C 544.91443,839.88071 551.05903,855.60705 554.25,862.46875 C 553.47847,866.02398 552.25863,871.92307 550.90625,880.5625 C 548.98583,892.83071 547.18798,907.71691 548.53125,920.4375 C 549.91334,933.52585 555.34347,948.62515 561.125,963.46875 C 566.90653,978.31235 573.17935,992.69586 576.34375,1001.5 C 582.97581,1019.9519 586.33671,1033.0763 587.65625,1050 C 588.2376,1057.4561 587.41398,1070.336 586.40625,1083.375 C 585.39852,1096.414 584.21964,1109.6764 584.71875,1120.3438 C 585.70786,1141.4836 594.04673,1167.1785 618.09375,1178.2812 C 640.86858,1188.7966 673.42057,1189.9834 701.53125,1174.8438 C 717.69117,1166.1404 731.60759,1147.7462 744.90625,1127.9375 C 758.20491,1108.1288 769.87542,1086.8841 776.84375,1073.0312 C 792.19667,1042.51 816.23728,957.56702 822.46875,920.3125 C 825.48734,902.26597 826.39041,891.24695 825.09375,882.28125 C 824.11522,875.51521 821.26556,870.13385 818.21875,866.0625 C 820.26149,838.55459 817.48668,814.69372 830.1875,786.65625 L 840.75,763.375 L 816.9375,772.6875 C 799.44775,779.52503 788.03586,791.73286 780.34375,804.75 C 780.02124,805.29577 779.78061,805.85776 779.46875,806.40625 C 779.69078,783.89104 783.87659,768.76866 786.0625,747.71875 L 788.03125,728.71875 L 771,737.375 C 740.40551,752.93071 725.30511,785.56821 721.28125,827.59375 C 717.03593,826.96828 712.44985,826.5741 707.46875,826.75 C 707.17726,787.56964 707.07246,759.71315 716.0625,727.375 L 721.65625,707.25 L 702.21875,714.90625 C 671.30938,727.11019 654.35921,756.83698 645.59375,783.28125 C 641.21102,796.50339 638.84793,809.08246 638,819.21875 C 637.76797,821.99248 637.68894,824.53007 637.6875,826.9375 C 634.44563,826.90109 631.26698,827.07339 627.625,827.4375 C 627.66662,788.43277 624.14076,747.68335 595.34375,710.9375 L 584,696.5 z M 589.8125,740.3125 C 606.61941,770.95633 607.28701,804.27978 606.75,840.0625 L 606.53125,855.125 L 618.56618,848.58579 C 627.22823,845.45277 638.12676,848.35827 650.5,847.75 L 665.17465,857.1066 L 658.84375,831.3125 C 658.7541,831.08253 658.62329,830.89581 658.59375,830.59375 C 658.39424,828.55389 658.37143,825.12068 658.71875,820.96875 C 659.41339,812.66489 661.50832,801.38351 665.34375,789.8125 C 670.49907,774.25956 678.83176,758.62002 690.46875,747.28125 C 685.78494,775.91923 687.25316,807.54059 687.45711,843.08639 L 684.69118,856.34803 L 700.1875,848.75 C 709.2169,845.99229 717.37647,848.40004 729.46875,849.84375 L 742.71507,859.28798 L 741.09375,840 C 742.54168,809.02823 749.31524,786.32192 761.8125,771.125 C 758.82562,790.90384 756.38207,812.9098 762.125,849.46875 L 763.19052,855.84193 L 760.25237,867.35878 L 770.86948,859.1906 L 780.05921,869.41258 L 778.51093,858.94898 L 781.9375,852 C 787.63852,838.78851 792.11032,825.78663 798.28125,815.34375 C 799.24111,813.71941 800.31278,812.27939 801.34375,810.78125 C 797.66309,831.9366 798.91659,850.9894 797.25,868.5625 L 792.56986,876.36948 L 799.96875,876.59375 C 803.1888,880.07736 803.83625,880.44443 804.53125,885.25 C 805.22625,890.05557 804.84987,899.65035 801.96875,916.875 C 796.40076,950.16292 770.17603,1040.0409 758.28125,1063.6875 C 751.67634,1076.8179 740.25127,1097.5832 727.65625,1116.3438 C 715.06123,1135.1043 700.29692,1151.8776 691.65625,1156.5312 C 670.16573,1168.1054 642.87545,1166.7914 626.8125,1159.375 C 612.43764,1152.738 606.27438,1135.9256 605.5,1119.375 C 605.11689,1111.187 606.11279,1098.0658 607.125,1084.9688 C 608.13721,1071.8717 618.41391,1062.398 622.54839,1048.4062 C 627.92068,1030.2254 621.10152,1011.8118 610.04839,994.46875 C 603.56184,984.29097 586.07159,970.21085 580.5,955.90625 C 574.92841,941.60165 570.13249,926.9031 569.21875,918.25 C 568.29254,909.47887 569.64125,895.22498 571.4375,883.75 C 573.23375,872.27503 575.28125,863.46875 575.28125,863.46875 L 584.70403,859.85355 L 574.21875,855.96875 C 574.21875,855.96875 565.71986,840.65865 566.125,821.15625 C 566.19611,817.73309 567.96126,808.4282 570.84375,797.96875 C 573.72624,787.5093 577.60841,775.41604 581.46875,764 C 584.51314,754.99692 587.24938,747.39655 589.8125,740.3125 z"
2831 id="path3669"
2832 clip-path="url(#clipPath3677)"
2833 sodipodi:nodetypes="ccssscsssssssssssssccccscccccccccsscccccccccccssscccccccccccccccsccccssssssssssssscccsssc" />
2834 <g
2835 id="g3628"
2836 clip-path="url(#clipPath3636)"
2837 transform="translate(276,136)">
2838 <path
2839 id="path8988"
2840 d="M 824.48651,818.48242 L 774.98903,802.92607 L 748.11898,855.25197 L 789.13117,900.50681 L 838.62864,862.32304 L 824.48651,818.48242 z"
2841 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9024);enable-background:accumulate" />
2842 <path
2843 id="path8990"
2844 d="M 964.49365,855.25197 L 909.33932,808.58293 L 865.4987,845.35248 L 899.43983,899.09259 L 906.51089,965.56063 L 855.59921,1000.916 L 814.58701,1012.2297 L 811.75859,1058.8987 L 868.32713,1084.3546 L 931.96674,1007.987 L 956.00837,913.23473 L 964.49365,855.25197 z"
2845 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9020);enable-background:accumulate" />
2846 </g>
2847 <path
2848 style="opacity:0.25;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter9044);enable-background:accumulate"
2849 d="M 1045.3322,1043.5779 L 1106.1434,1016.7078 L 1206.5525,1047.8205 L 1142.9129,1078.9332 L 1060.8885,1061.9626 L 1045.3322,1043.5779 z"
2850 id="path8992" />
2851 <path
2852 transform="translate(450.03125,73.843964)"
2853 style="opacity:0.58775509;fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:20.79999924;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;display:inline;filter:url(#filter4185);enable-background:new"
2854 d="M 583.0625,715.75 C 570.95641,750.19974 556.348,784.28333 551.3125,820.59375 C 550.48042,835.52242 555.90165,849.75318 560.15625,863.65625 C 554.24001,890.85751 550.01944,920.5562 561.3125,946.78125 C 574.82967,984.9421 596.31397,1022.4634 593.73529,1064.2495 C 592.78699,1093.5437 584.72085,1125.2436 599.125,1152.5312 C 609.32364,1171.866 632.26456,1179.8429 653.09285,1180.1988 C 680.95504,1181.3729 709.55546,1168.5772 725.09375,1144.9375 C 747.68924,1115.5658 766.89426,1083.4402 780.3324,1048.8777 C 797.22293,1003.3717 810.0042,956.31698 818.26642,908.4788 C 820.09082,895.53774 821.3675,881.00895 813.6875,869.65625 C 810.25635,862.31993 813.72957,854.09611 813.00293,846.34648 C 813.67693,821.35182 817.01525,795.68272 829.65625,773.75 C 811.92312,780.1946 794.58357,790.30971 785.65318,807.61425 C 781.7181,814.3238 778.04836,821.18838 774.28125,828 C 770.73126,797.98592 778.00088,768.35172 781.0625,738.71875 C 760.89646,747.77338 744.18578,764.37397 736.88755,785.40075 C 730.58292,800.98078 728.08533,817.71793 726.625,834.4375 C 718.37166,832.91825 709.94053,832.33595 701.5625,832.9375 C 700.59942,794.23963 701.09554,753.53035 712.53125,717.03125 C 693.85012,723.24901 677.36504,735.76676 666.90322,752.41848 C 653.05068,773.29827 645.64182,798.17243 643.84375,823.03125 C 644.42909,827.35579 643.78249,834.87134 637.5,832.90625 C 632.16882,832.9238 626.87092,833.58508 621.5625,834 C 622.71034,794.61852 618.22106,752.3718 594.5,719.78125 C 591.43929,716.14408 588.86315,712.09687 585.875,708.4375 C 584.9375,710.875 584,713.3125 583.0625,715.75 z M 590.8125,729.59375 C 609.37777,758.89004 613.295,794.41387 612.9375,828.46875 C 613.14159,833.64401 612.42094,840.29795 613.0625,844.53125 C 625.38106,838.4285 639.80162,842.09135 652.84375,842.34375 C 655.16087,843.567 656.03585,843.99618 654.75,840.9375 C 650.58545,826.98465 652.90172,812.3245 656.55504,798.52986 C 662.92191,772.23922 677.18332,747.44188 699.375,731.5 C 690.75791,768.73706 693.65842,808.06161 693.28125,845.46875 C 705.53469,838.55885 720.56004,842.02262 733.3125,845.21875 C 736.70472,848.75355 735.60185,844.48927 735.5,841.40625 C 735.01691,820.03567 739.63133,798.33662 749.1875,779.25 C 755.15016,768.56273 763.43088,759.44621 771.625,750.375 C 763.75344,784.2131 762.4221,819.71093 768.90625,853.875 C 770.6311,852.46382 773.51306,853.42086 774.5625,853.5 C 784.24619,832.26318 790.91362,808.11938 809.45266,792.75815 C 811.32595,792.38693 808.00448,801.2831 807.96875,804.65625 C 804.43387,826.50206 804.67155,848.82948 803.0625,870.90625 C 801.75012,872.28304 805.91085,873.22979 806.53125,874.96875 C 814.45436,885.53455 809.65419,899.80024 808.62285,911.74886 C 801.5588,956.78241 786.85732,1000.1282 773.04608,1043.446 C 760.97132,1076.3953 742.32638,1106.526 721.8125,1134.875 C 710.85993,1148.4264 698.56778,1162.7263 680.96875,1167.375 C 660.81274,1173.6163 637.3928,1172.5494 619,1161.7188 C 601.71034,1149.3774 597.97607,1126.0099 599.73774,1106.0324 C 599.78653,1090.2062 602.10985,1078.2316 607.65521,1063.2271 C 613.20056,1048.2226 610.12626,1031.8954 605.875,1015.5938 C 601.62374,999.2922 593.69597,989.33378 584.05342,973.38963 C 574.41087,957.44548 566.41652,937.35229 563.28125,917.8125 C 561.53177,899.18536 566.17296,880.68988 569.0625,862.5625 C 572.35873,859.72554 567.46451,857.36591 566.75,854.375 C 559.14887,837.35992 558.34253,817.6001 564.00766,799.81502 C 571.13786,774.74272 579.76853,750.18261 588.6875,725.6875 C 589.39583,726.98958 590.10417,728.29167 590.8125,729.59375 z"
2855 id="path4149"
2856 sodipodi:nodetypes="ccccccccccccccccccccccccccccccccccccccccccccccccccczzzcccccc"
2857 clip-path="url(#clipPath4177)" />
2858 <path
2859 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4130);enable-background:accumulate"
2860 d="M 735.05635,733.03834 L 737.81177,754.12715 L 782.2228,738.73894 L 787.07343,716.34919 L 783.13726,694.29697 L 760.68563,657.70396 L 752.40559,688.0089 L 735.05635,733.03834 z"
2861 id="path3902"
2862 sodipodi:nodetypes="cccccccc"
2863 clip-path="url(#clipPath3631)"
2864 transform="translate(276,136)" />
2865 <path
2866 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;filter:url(#filter4141);enable-background:accumulate"
2867 d="M 831.81321,730.29452 L 847.63558,745.19938 L 868.49031,748.09932 L 866.90002,708.17334 L 875.22563,677.66492 L 868.06064,671.32386 L 846.36395,692.26626 L 831.81321,730.29452 z"
2868 id="path4135"
2869 sodipodi:nodetypes="cccccccc"
2870 clip-path="url(#clipPath3631)"
2871 transform="translate(276,136)" />
2872 <g
2873 id="g8367"
2874 style="filter:url(#filter8379)"
2875 clip-path="url(#clipPath8392)"
2876 transform="translate(276,136)">
2877 <path
2878 clip-path="none"
2879 sodipodi:nodetypes="ccccccc"
2880 id="path4145"
2881 d="M 910.14441,746.31415 L 942.75736,751.48808 L 942.39617,727.61189 L 949.5847,697.92968 L 941.13358,692.66603 L 919.31164,719.1768 L 910.14441,746.31415 z"
2882 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2883 <rect
2884 y="650.19098"
2885 x="877.51953"
2886 height="172.53406"
2887 width="123.03658"
2888 id="rect8365"
2889 style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2890 </g>
2891 <g
2892 id="g8400"
2893 style="filter:url(#filter8404)"
2894 clip-path="url(#clipPath8417)"
2895 transform="translate(276,136)">
2896 <path
2897 clip-path="none"
2898 sodipodi:nodetypes="ccccc"
2899 id="path4147"
2900 d="M 964.00012,754.69487 L 982.42893,762.15966 L 991.5,725.19519 L 976.62969,730.03405 L 964.00012,754.69487 z"
2901 style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2902 <rect
2903 y="677.06104"
2904 x="924.89569"
2905 height="125.1579"
2906 width="142.12846"
2907 id="rect8398"
2908 style="opacity:1;fill:none;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:25;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible;enable-background:accumulate" />
2909 </g>
2910 </g>
2911</svg>
diff --git a/Documentation/logo.txt b/Documentation/logo.txt
index a2e62445e28e..296f0f7f67eb 100644
--- a/Documentation/logo.txt
+++ b/Documentation/logo.txt
@@ -1,4 +1,13 @@
1Tux is taking a three month sabbatical to work as a barber, so Tuz is 1This is the full-colour version of the currently unofficial Linux logo
2standing in. He's taken pains to ensure you'll hardly notice. 2("currently unofficial" just means that there has been no paperwork and
3that I have not really announced it yet). It was created by Larry Ewing,
4and is freely usable as long as you acknowledge Larry as the original
5artist.
6
7Note that there are black-and-white versions of this available that
8scale down to smaller sizes and are better for letterheads or whatever
9you want to use it for: for the full range of logos take a look at
10Larry's web-page:
11
12 http://www.isc.tamu.edu/~lewing/linux/
3 13
4Image by Andrew McGown and Josh Bush. Image is licensed CC BY-SA.
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 1da9d1b1793f..4edd39ec7db9 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -164,15 +164,19 @@ All md devices contain:
164 raid_disks 164 raid_disks
165 a text file with a simple number indicating the number of devices 165 a text file with a simple number indicating the number of devices
166 in a fully functional array. If this is not yet known, the file 166 in a fully functional array. If this is not yet known, the file
167 will be empty. If an array is being resized (not currently 167 will be empty. If an array is being resized this will contain
168 possible) this will contain the larger of the old and new sizes. 168 the new number of devices.
169 Some raid level (RAID1) allow this value to be set while the 169 Some raid levels allow this value to be set while the array is
170 array is active. This will reconfigure the array. Otherwise 170 active. This will reconfigure the array. Otherwise it can only
171 it can only be set while assembling an array. 171 be set while assembling an array.
172 A change to this attribute will not be permitted if it would
173 reduce the size of the array. To reduce the number of drives
174 in an e.g. raid5, the array size must first be reduced by
175 setting the 'array_size' attribute.
172 176
173 chunk_size 177 chunk_size
174 This is the size if bytes for 'chunks' and is only relevant to 178 This is the size in bytes for 'chunks' and is only relevant to
175 raid levels that involve striping (1,4,5,6,10). The address space 179 raid levels that involve striping (0,4,5,6,10). The address space
176 of the array is conceptually divided into chunks and consecutive 180 of the array is conceptually divided into chunks and consecutive
177 chunks are striped onto neighbouring devices. 181 chunks are striped onto neighbouring devices.
178 The size should be at least PAGE_SIZE (4k) and should be a power 182 The size should be at least PAGE_SIZE (4k) and should be a power
@@ -183,6 +187,20 @@ All md devices contain:
183 simply a number that is interpretted differently by different 187 simply a number that is interpretted differently by different
184 levels. It can be written while assembling an array. 188 levels. It can be written while assembling an array.
185 189
190 array_size
191 This can be used to artificially constrain the available space in
192 the array to be less than is actually available on the combined
193 devices. Writing a number (in Kilobytes) which is less than
194 the available size will set the size. Any reconfiguration of the
195 array (e.g. adding devices) will not cause the size to change.
196 Writing the word 'default' will cause the effective size of the
197 array to be whatever size is actually available based on
198 'level', 'chunk_size' and 'component_size'.
199
200 This can be used to reduce the size of the array before reducing
201 the number of devices in a raid4/5/6, or to support external
202 metadata formats which mandate such clipping.
203
186 reshape_position 204 reshape_position
187 This is either "none" or a sector number within the devices of 205 This is either "none" or a sector number within the devices of
188 the array where "reshape" is up to. If this is set, the three 206 the array where "reshape" is up to. If this is set, the three
@@ -207,6 +225,11 @@ All md devices contain:
207 about the array. It can be 0.90 (traditional format), 1.0, 1.1, 225 about the array. It can be 0.90 (traditional format), 1.0, 1.1,
208 1.2 (newer format in varying locations) or "none" indicating that 226 1.2 (newer format in varying locations) or "none" indicating that
209 the kernel isn't managing metadata at all. 227 the kernel isn't managing metadata at all.
228 Alternately it can be "external:" followed by a string which
229 is set by user-space. This indicates that metadata is managed
230 by a user-space program. Any device failure or other event that
231 requires a metadata update will cause array activity to be
232 suspended until the event is acknowledged.
210 233
211 resync_start 234 resync_start
212 The point at which resync should start. If no resync is needed, 235 The point at which resync should start. If no resync is needed,
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f5b7127f54ac..7f5809eddee6 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -31,6 +31,7 @@ Contents:
31 31
32 - Locking functions. 32 - Locking functions.
33 - Interrupt disabling functions. 33 - Interrupt disabling functions.
34 - Sleep and wake-up functions.
34 - Miscellaneous functions. 35 - Miscellaneous functions.
35 36
36 (*) Inter-CPU locking barrier effects. 37 (*) Inter-CPU locking barrier effects.
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some
1217other means. 1218other means.
1218 1219
1219 1220
1221SLEEP AND WAKE-UP FUNCTIONS
1222---------------------------
1223
1224Sleeping and waking on an event flagged in global data can be viewed as an
1225interaction between two pieces of data: the task state of the task waiting for
1226the event and the global data used to indicate the event. To make sure that
1227these appear to happen in the right order, the primitives to begin the process
1228of going to sleep, and the primitives to initiate a wake up imply certain
1229barriers.
1230
1231Firstly, the sleeper normally follows something like this sequence of events:
1232
1233 for (;;) {
1234 set_current_state(TASK_UNINTERRUPTIBLE);
1235 if (event_indicated)
1236 break;
1237 schedule();
1238 }
1239
1240A general memory barrier is interpolated automatically by set_current_state()
1241after it has altered the task state:
1242
1243 CPU 1
1244 ===============================
1245 set_current_state();
1246 set_mb();
1247 STORE current->state
1248 <general barrier>
1249 LOAD event_indicated
1250
1251set_current_state() may be wrapped by:
1252
1253 prepare_to_wait();
1254 prepare_to_wait_exclusive();
1255
1256which therefore also imply a general memory barrier after setting the state.
1257The whole sequence above is available in various canned forms, all of which
1258interpolate the memory barrier in the right place:
1259
1260 wait_event();
1261 wait_event_interruptible();
1262 wait_event_interruptible_exclusive();
1263 wait_event_interruptible_timeout();
1264 wait_event_killable();
1265 wait_event_timeout();
1266 wait_on_bit();
1267 wait_on_bit_lock();
1268
1269
1270Secondly, code that performs a wake up normally follows something like this:
1271
1272 event_indicated = 1;
1273 wake_up(&event_wait_queue);
1274
1275or:
1276
1277 event_indicated = 1;
1278 wake_up_process(event_daemon);
1279
1280A write memory barrier is implied by wake_up() and co. if and only if they wake
1281something up. The barrier occurs before the task state is cleared, and so sits
1282between the STORE to indicate the event and the STORE to set TASK_RUNNING:
1283
1284 CPU 1 CPU 2
1285 =============================== ===============================
1286 set_current_state(); STORE event_indicated
1287 set_mb(); wake_up();
1288 STORE current->state <write barrier>
1289 <general barrier> STORE current->state
1290 LOAD event_indicated
1291
1292The available waker functions include:
1293
1294 complete();
1295 wake_up();
1296 wake_up_all();
1297 wake_up_bit();
1298 wake_up_interruptible();
1299 wake_up_interruptible_all();
1300 wake_up_interruptible_nr();
1301 wake_up_interruptible_poll();
1302 wake_up_interruptible_sync();
1303 wake_up_interruptible_sync_poll();
1304 wake_up_locked();
1305 wake_up_locked_poll();
1306 wake_up_nr();
1307 wake_up_poll();
1308 wake_up_process();
1309
1310
1311[!] Note that the memory barriers implied by the sleeper and the waker do _not_
1312order multiple stores before the wake-up with respect to loads of those stored
1313values after the sleeper has called set_current_state(). For instance, if the
1314sleeper does:
1315
1316 set_current_state(TASK_INTERRUPTIBLE);
1317 if (event_indicated)
1318 break;
1319 __set_current_state(TASK_RUNNING);
1320 do_something(my_data);
1321
1322and the waker does:
1323
1324 my_data = value;
1325 event_indicated = 1;
1326 wake_up(&event_wait_queue);
1327
1328there's no guarantee that the change to event_indicated will be perceived by
1329the sleeper as coming after the change to my_data. In such a circumstance, the
1330code on both sides must interpolate its own memory barriers between the
1331separate data accesses. Thus the above sleeper ought to do:
1332
1333 set_current_state(TASK_INTERRUPTIBLE);
1334 if (event_indicated) {
1335 smp_rmb();
1336 do_something(my_data);
1337 }
1338
1339and the waker should do:
1340
1341 my_data = value;
1342 smp_wmb();
1343 event_indicated = 1;
1344 wake_up(&event_wait_queue);
1345
1346
1220MISCELLANEOUS FUNCTIONS 1347MISCELLANEOUS FUNCTIONS
1221----------------------- 1348-----------------------
1222 1349
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED?
1366 1493
1367Under normal operation, memory operation reordering is generally not going to 1494Under normal operation, memory operation reordering is generally not going to
1368be a problem as a single-threaded linear piece of code will still appear to 1495be a problem as a single-threaded linear piece of code will still appear to
1369work correctly, even if it's in an SMP kernel. There are, however, three 1496work correctly, even if it's in an SMP kernel. There are, however, four
1370circumstances in which reordering definitely _could_ be a problem: 1497circumstances in which reordering definitely _could_ be a problem:
1371 1498
1372 (*) Interprocessor interaction. 1499 (*) Interprocessor interaction.
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 4c2ecf537a4a..bbc8a6a36921 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -73,13 +73,13 @@ this phase is triggered automatically. ACPI can notify this event. If not,
73(see Section 4.). 73(see Section 4.).
74 74
75Logical Memory Hotplug phase is to change memory state into 75Logical Memory Hotplug phase is to change memory state into
76avaiable/unavailable for users. Amount of memory from user's view is 76available/unavailable for users. Amount of memory from user's view is
77changed by this phase. The kernel makes all memory in it as free pages 77changed by this phase. The kernel makes all memory in it as free pages
78when a memory range is available. 78when a memory range is available.
79 79
80In this document, this phase is described as online/offline. 80In this document, this phase is described as online/offline.
81 81
82Logical Memory Hotplug phase is triggred by write of sysfs file by system 82Logical Memory Hotplug phase is triggered by write of sysfs file by system
83administrator. For the hot-add case, it must be executed after Physical Hotplug 83administrator. For the hot-add case, it must be executed after Physical Hotplug
84phase by hand. 84phase by hand.
85(However, if you writes udev's hotplug scripts for memory hotplug, these 85(However, if you writes udev's hotplug scripts for memory hotplug, these
@@ -334,7 +334,7 @@ MEMORY_CANCEL_ONLINE
334 Generated if MEMORY_GOING_ONLINE fails. 334 Generated if MEMORY_GOING_ONLINE fails.
335 335
336MEMORY_ONLINE 336MEMORY_ONLINE
337 Generated when memory has succesfully brought online. The callback may 337 Generated when memory has successfully brought online. The callback may
338 allocate pages from the new memory. 338 allocate pages from the new memory.
339 339
340MEMORY_GOING_OFFLINE 340MEMORY_GOING_OFFLINE
@@ -359,7 +359,7 @@ The third argument is passed by pointer of struct memory_notify.
359struct memory_notify { 359struct memory_notify {
360 unsigned long start_pfn; 360 unsigned long start_pfn;
361 unsigned long nr_pages; 361 unsigned long nr_pages;
362 int status_cahnge_nid; 362 int status_change_nid;
363} 363}
364 364
365start_pfn is start_pfn of online/offline memory. 365start_pfn is start_pfn of online/offline memory.
diff --git a/Documentation/misc-devices/isl29003 b/Documentation/misc-devices/isl29003
new file mode 100644
index 000000000000..c4ff5f38e010
--- /dev/null
+++ b/Documentation/misc-devices/isl29003
@@ -0,0 +1,62 @@
1Kernel driver isl29003
2=====================
3
4Supported chips:
5* Intersil ISL29003
6Prefix: 'isl29003'
7Addresses scanned: none
8Datasheet:
9http://www.intersil.com/data/fn/fn7464.pdf
10
11Author: Daniel Mack <daniel@caiaq.de>
12
13
14Description
15-----------
16The ISL29003 is an integrated light sensor with a 16-bit integrating type
17ADC, I2C user programmable lux range select for optimized counts/lux, and
18I2C multi-function control and monitoring capabilities. The internal ADC
19provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by
20artificial light sources.
21
22The driver allows to set the lux range, the bit resolution, the operational
23mode (see below) and the power state of device and can read the current lux
24value, of course.
25
26
27Detection
28---------
29
30The ISL29003 does not have an ID register which could be used to identify
31it, so the detection routine will just try to read from the configured I2C
32addess and consider the device to be present as soon as it ACKs the
33transfer.
34
35
36Sysfs entries
37-------------
38
39range:
40 0: 0 lux to 1000 lux (default)
41 1: 0 lux to 4000 lux
42 2: 0 lux to 16,000 lux
43 3: 0 lux to 64,000 lux
44
45resolution:
46 0: 2^16 cycles (default)
47 1: 2^12 cycles
48 2: 2^8 cycles
49 3: 2^4 cycles
50
51mode:
52 0: diode1's current (unsigned 16bit) (default)
53 1: diode1's current (unsigned 16bit)
54 2: difference between diodes (l1 - l2, signed 15bit)
55
56power_state:
57 0: device is disabled (default)
58 1: device is enabled
59
60lux (read only):
61 returns the value from the last sensor reading
62
diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt
index 1fef1f06dfd2..d3507bad428d 100644
--- a/Documentation/mn10300/ABI.txt
+++ b/Documentation/mn10300/ABI.txt
@@ -26,7 +26,7 @@ registers and the stack. If the first argument is a 64-bit value, it will be
26passed in D0:D1. If the first argument is not a 64-bit value, but the second 26passed in D0:D1. If the first argument is not a 64-bit value, but the second
27is, the second will be passed entirely on the stack and D1 will be unused. 27is, the second will be passed entirely on the stack and D1 will be unused.
28 28
29Arguments smaller than 32-bits are not coelesced within a register or a stack 29Arguments smaller than 32-bits are not coalesced within a register or a stack
30word. For example, two byte-sized arguments will always be passed in separate 30word. For example, two byte-sized arguments will always be passed in separate
31registers or word-sized stack slots. 31registers or word-sized stack slots.
32 32
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
index bdf93b7f0f24..274821b35a7f 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/mtd/nand_ecc.txt
@@ -50,7 +50,7 @@ byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
50 cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4 50 cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
51 51
52This figure represents a sector of 256 bytes. 52This figure represents a sector of 256 bytes.
53cp is my abbreviaton for column parity, rp for row parity. 53cp is my abbreviation for column parity, rp for row parity.
54 54
55Let's start to explain column parity. 55Let's start to explain column parity.
56cp0 is the parity that belongs to all bit0, bit2, bit4, bit6. 56cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
@@ -560,7 +560,7 @@ Measuring this code again showed big gain. When executing the original
560linux code 1 million times, this took about 1 second on my system. 560linux code 1 million times, this took about 1 second on my system.
561(using time to measure the performance). After this iteration I was back 561(using time to measure the performance). After this iteration I was back
562to 0.075 sec. Actually I had to decide to start measuring over 10 562to 0.075 sec. Actually I had to decide to start measuring over 10
563million interations in order not to loose too much accuracy. This one 563million iterations in order not to lose too much accuracy. This one
564definitely seemed to be the jackpot! 564definitely seemed to be the jackpot!
565 565
566There is a little bit more room for improvement though. There are three 566There is a little bit more room for improvement though. There are three
@@ -571,8 +571,8 @@ loop; This eliminates 3 statements per loop. Of course after the loop we
571need to correct by adding: 571need to correct by adding:
572 rp4 ^= rp4_6; 572 rp4 ^= rp4_6;
573 rp6 ^= rp4_6 573 rp6 ^= rp4_6
574Furthermore there are 4 sequential assingments to rp8. This can be 574Furthermore there are 4 sequential assignments to rp8. This can be
575encoded slightly more efficient by saving tmppar before those 4 lines 575encoded slightly more efficiently by saving tmppar before those 4 lines
576and later do rp8 = rp8 ^ tmppar ^ notrp8; 576and later do rp8 = rp8 ^ tmppar ^ notrp8;
577(where notrp8 is the value of rp8 before those 4 lines). 577(where notrp8 is the value of rp8 before those 4 lines).
578Again a use of the commutative property of xor. 578Again a use of the commutative property of xor.
@@ -622,7 +622,7 @@ Not a big change, but every penny counts :-)
622Analysis 7 622Analysis 7
623========== 623==========
624 624
625Acutally this made things worse. Not very much, but I don't want to move 625Actually this made things worse. Not very much, but I don't want to move
626into the wrong direction. Maybe something to investigate later. Could 626into the wrong direction. Maybe something to investigate later. Could
627have to do with caching again. 627have to do with caching again.
628 628
@@ -642,7 +642,7 @@ Analysis 8
642This makes things worse. Let's stick with attempt 6 and continue from there. 642This makes things worse. Let's stick with attempt 6 and continue from there.
643Although it seems that the code within the loop cannot be optimised 643Although it seems that the code within the loop cannot be optimised
644further there is still room to optimize the generation of the ecc codes. 644further there is still room to optimize the generation of the ecc codes.
645We can simply calcualate the total parity. If this is 0 then rp4 = rp5 645We can simply calculate the total parity. If this is 0 then rp4 = rp5
646etc. If the parity is 1, then rp4 = !rp5; 646etc. If the parity is 1, then rp4 = !rp5;
647But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits 647But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
648in the result byte and then do something like 648in the result byte and then do something like
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 5ede7473b425..d5181ce9ff62 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -221,7 +221,7 @@ ad_select
221 221
222 - Any slave's 802.3ad association state changes 222 - Any slave's 802.3ad association state changes
223 223
224 - The bond's adminstrative state changes to up 224 - The bond's administrative state changes to up
225 225
226 count or 2 226 count or 2
227 227
@@ -369,7 +369,7 @@ fail_over_mac
369 When this policy is used in conjuction with the mii 369 When this policy is used in conjuction with the mii
370 monitor, devices which assert link up prior to being 370 monitor, devices which assert link up prior to being
371 able to actually transmit and receive are particularly 371 able to actually transmit and receive are particularly
372 susecptible to loss of the gratuitous ARP, and an 372 susceptible to loss of the gratuitous ARP, and an
373 appropriate updelay setting may be required. 373 appropriate updelay setting may be required.
374 374
375 follow or 2 375 follow or 2
@@ -1242,7 +1242,7 @@ monitoring is enabled, and vice-versa.
1242To add ARP targets: 1242To add ARP targets:
1243# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target 1243# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
1244# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target 1244# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
1245 NOTE: up to 10 target addresses may be specified. 1245 NOTE: up to 16 target addresses may be specified.
1246 1246
1247To remove an ARP target: 1247To remove an ARP target:
1248# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target 1248# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
@@ -1794,7 +1794,7 @@ target to query.
1794generally referred to as "trunk failover." This is a feature of the 1794generally referred to as "trunk failover." This is a feature of the
1795switch that causes the link state of a particular switch port to be set 1795switch that causes the link state of a particular switch port to be set
1796down (or up) when the state of another switch port goes down (or up). 1796down (or up) when the state of another switch port goes down (or up).
1797It's purpose is to propogate link failures from logically "exterior" ports 1797Its purpose is to propagate link failures from logically "exterior" ports
1798to the logically "interior" ports that bonding is able to monitor via 1798to the logically "interior" ports that bonding is able to monitor via
1799miimon. Availability and configuration for trunk failover varies by 1799miimon. Availability and configuration for trunk failover varies by
1800switch, but this can be a viable alternative to the ARP monitor when using 1800switch, but this can be a viable alternative to the ARP monitor when using
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 2035bc4932f2..cd79735013f9 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -36,10 +36,15 @@ This file contains
36 6.2 local loopback of sent frames 36 6.2 local loopback of sent frames
37 6.3 CAN controller hardware filters 37 6.3 CAN controller hardware filters
38 6.4 The virtual CAN driver (vcan) 38 6.4 The virtual CAN driver (vcan)
39 6.5 currently supported CAN hardware 39 6.5 The CAN network device driver interface
40 6.6 todo 40 6.5.1 Netlink interface to set/get devices properties
41 6.5.2 Setting the CAN bit-timing
42 6.5.3 Starting and stopping the CAN network device
43 6.6 supported CAN hardware
41 44
42 7 Credits 45 7 Socket CAN resources
46
47 8 Credits
43 48
44============================================================================ 49============================================================================
45 50
@@ -234,6 +239,8 @@ solution for a couple of reasons:
234 the user application using the common CAN filter mechanisms. Inside 239 the user application using the common CAN filter mechanisms. Inside
235 this filter definition the (interested) type of errors may be 240 this filter definition the (interested) type of errors may be
236 selected. The reception of error frames is disabled by default. 241 selected. The reception of error frames is disabled by default.
242 The format of the CAN error frame is briefly decribed in the Linux
243 header file "include/linux/can/error.h".
237 244
2384. How to use Socket CAN 2454. How to use Socket CAN
239------------------------ 246------------------------
@@ -327,7 +334,7 @@ solution for a couple of reasons:
327 return 1; 334 return 1;
328 } 335 }
329 336
330 /* paraniod check ... */ 337 /* paranoid check ... */
331 if (nbytes < sizeof(struct can_frame)) { 338 if (nbytes < sizeof(struct can_frame)) {
332 fprintf(stderr, "read: incomplete CAN frame\n"); 339 fprintf(stderr, "read: incomplete CAN frame\n");
333 return 1; 340 return 1;
@@ -605,61 +612,213 @@ solution for a couple of reasons:
605 removal of vcan network devices can be managed with the ip(8) tool: 612 removal of vcan network devices can be managed with the ip(8) tool:
606 613
607 - Create a virtual CAN network interface: 614 - Create a virtual CAN network interface:
608 ip link add type vcan 615 $ ip link add type vcan
609 616
610 - Create a virtual CAN network interface with a specific name 'vcan42': 617 - Create a virtual CAN network interface with a specific name 'vcan42':
611 ip link add dev vcan42 type vcan 618 $ ip link add dev vcan42 type vcan
612 619
613 - Remove a (virtual CAN) network interface 'vcan42': 620 - Remove a (virtual CAN) network interface 'vcan42':
614 ip link del vcan42 621 $ ip link del vcan42
615 622
616 The tool 'vcan' from the SocketCAN SVN repository on BerliOS is obsolete. 623 6.5 The CAN network device driver interface
617 624
618 Virtual CAN network device creation in older Kernels: 625 The CAN network device driver interface provides a generic interface
619 In Linux Kernel versions < 2.6.24 the vcan driver creates 4 vcan 626 to setup, configure and monitor CAN network devices. The user can then
620 netdevices at module load time by default. This value can be changed 627 configure the CAN device, like setting the bit-timing parameters, via
621 with the module parameter 'numdev'. E.g. 'modprobe vcan numdev=8' 628 the netlink interface using the program "ip" from the "IPROUTE2"
622 629 utility suite. The following chapter describes briefly how to use it.
623 6.5 currently supported CAN hardware 630 Furthermore, the interface uses a common data structure and exports a
631 set of common functions, which all real CAN network device drivers
632 should use. Please have a look to the SJA1000 or MSCAN driver to
633 understand how to use them. The name of the module is can-dev.ko.
634
635 6.5.1 Netlink interface to set/get devices properties
636
637 The CAN device must be configured via netlink interface. The supported
638 netlink message types are defined and briefly described in
639 "include/linux/can/netlink.h". CAN link support for the program "ip"
640 of the IPROUTE2 utility suite is avaiable and it can be used as shown
641 below:
642
643 - Setting CAN device properties:
644
645 $ ip link set can0 type can help
646 Usage: ip link set DEVICE type can
647 [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
648 [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
649 phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
650
651 [ loopback { on | off } ]
652 [ listen-only { on | off } ]
653 [ triple-sampling { on | off } ]
654
655 [ restart-ms TIME-MS ]
656 [ restart ]
657
658 Where: BITRATE := { 1..1000000 }
659 SAMPLE-POINT := { 0.000..0.999 }
660 TQ := { NUMBER }
661 PROP-SEG := { 1..8 }
662 PHASE-SEG1 := { 1..8 }
663 PHASE-SEG2 := { 1..8 }
664 SJW := { 1..4 }
665 RESTART-MS := { 0 | NUMBER }
666
667 - Display CAN device details and statistics:
668
669 $ ip -details -statistics link show can0
670 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
671 link/can
672 can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
673 bitrate 125000 sample_point 0.875
674 tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
675 sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
676 clock 8000000
677 re-started bus-errors arbit-lost error-warn error-pass bus-off
678 41 17457 0 41 42 41
679 RX: bytes packets errors dropped overrun mcast
680 140859 17608 17457 0 0 0
681 TX: bytes packets errors dropped carrier collsns
682 861 112 0 41 0 0
683
684 More info to the above output:
685
686 "<TRIPLE-SAMPLING>"
687 Shows the list of selected CAN controller modes: LOOPBACK,
688 LISTEN-ONLY, or TRIPLE-SAMPLING.
689
690 "state ERROR-ACTIVE"
691 The current state of the CAN controller: "ERROR-ACTIVE",
692 "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
693
694 "restart-ms 100"
695 Automatic restart delay time. If set to a non-zero value, a
696 restart of the CAN controller will be triggered automatically
697 in case of a bus-off condition after the specified delay time
698 in milliseconds. By default it's off.
699
700 "bitrate 125000 sample_point 0.875"
701 Shows the real bit-rate in bits/sec and the sample-point in the
702 range 0.000..0.999. If the calculation of bit-timing parameters
703 is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
704 bit-timing can be defined by setting the "bitrate" argument.
705 Optionally the "sample-point" can be specified. By default it's
706 0.000 assuming CIA-recommended sample-points.
707
708 "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
709 Shows the time quanta in ns, propagation segment, phase buffer
710 segment 1 and 2 and the synchronisation jump width in units of
711 tq. They allow to define the CAN bit-timing in a hardware
712 independent format as proposed by the Bosch CAN 2.0 spec (see
713 chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
714
715 "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
716 clock 8000000"
717 Shows the bit-timing constants of the CAN controller, here the
718 "sja1000". The minimum and maximum values of the time segment 1
719 and 2, the synchronisation jump width in units of tq, the
720 bitrate pre-scaler and the CAN system clock frequency in Hz.
721 These constants could be used for user-defined (non-standard)
722 bit-timing calculation algorithms in user-space.
723
724 "re-started bus-errors arbit-lost error-warn error-pass bus-off"
725 Shows the number of restarts, bus and arbitration lost errors,
726 and the state changes to the error-warning, error-passive and
727 bus-off state. RX overrun errors are listed in the "overrun"
728 field of the standard network statistics.
729
730 6.5.2 Setting the CAN bit-timing
731
732 The CAN bit-timing parameters can always be defined in a hardware
733 independent format as proposed in the Bosch CAN 2.0 specification
734 specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
735 and "sjw":
736
737 $ ip link set canX type can tq 125 prop-seg 6 \
738 phase-seg1 7 phase-seg2 2 sjw 1
739
740 If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
741 recommended CAN bit-timing parameters will be calculated if the bit-
742 rate is specified with the argument "bitrate":
743
744 $ ip link set canX type can bitrate 125000
745
746 Note that this works fine for the most common CAN controllers with
747 standard bit-rates but may *fail* for exotic bit-rates or CAN system
748 clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
749 space and allows user-space tools to solely determine and set the
750 bit-timing parameters. The CAN controller specific bit-timing
751 constants can be used for that purpose. They are listed by the
752 following command:
753
754 $ ip -details link show can0
755 ...
756 sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
757
758 6.5.3 Starting and stopping the CAN network device
759
760 A CAN network device is started or stopped as usual with the command
761 "ifconfig canX up/down" or "ip link set canX up/down". Be aware that
762 you *must* define proper bit-timing parameters for real CAN devices
763 before you can start it to avoid error-prone default settings:
764
765 $ ip link set canX up type can bitrate 125000
766
767 A device may enter the "bus-off" state if too much errors occurred on
768 the CAN bus. Then no more messages are received or sent. An automatic
769 bus-off recovery can be enabled by setting the "restart-ms" to a
770 non-zero value, e.g.:
771
772 $ ip link set canX type can restart-ms 100
773
774 Alternatively, the application may realize the "bus-off" condition
775 by monitoring CAN error frames and do a restart when appropriate with
776 the command:
777
778 $ ip link set canX type can restart
779
780 Note that a restart will also create a CAN error frame (see also
781 chapter 3.4).
624 782
625 On the project website http://developer.berlios.de/projects/socketcan 783 6.6 Supported CAN hardware
626 there are different drivers available:
627 784
628 vcan: Virtual CAN interface driver (if no real hardware is available) 785 Please check the "Kconfig" file in "drivers/net/can" to get an actual
629 sja1000: Philips SJA1000 CAN controller (recommended) 786 list of the support CAN hardware. On the Socket CAN project website
630 i82527: Intel i82527 CAN controller 787 (see chapter 7) there might be further drivers available, also for
631 mscan: Motorola/Freescale CAN controller (e.g. inside SOC MPC5200) 788 older kernel versions.
632 ccan: CCAN controller core (e.g. inside SOC h7202)
633 slcan: For a bunch of CAN adaptors that are attached via a
634 serial line ASCII protocol (for serial / USB adaptors)
635 789
636 Additionally the different CAN adaptors (ISA/PCI/PCMCIA/USB/Parport) 7907. Socket CAN resources
637 from PEAK Systemtechnik support the CAN netdevice driver model 791-----------------------
638 since Linux driver v6.0: http://www.peak-system.com/linux/index.htm
639 792
640 Please check the Mailing Lists on the berlios OSS project website. 793 You can find further resources for Socket CAN like user space tools,
794 support for old kernel versions, more drivers, mailing lists, etc.
795 at the BerliOS OSS project website for Socket CAN:
641 796
642 6.6 todo 797 http://developer.berlios.de/projects/socketcan
643 798
644 The configuration interface for CAN network drivers is still an open 799 If you have questions, bug fixes, etc., don't hesitate to post them to
645 issue that has not been finalized in the socketcan project. Also the 800 the Socketcan-Users mailing list. But please search the archives first.
646 idea of having a library module (candev.ko) that holds functions
647 that are needed by all CAN netdevices is not ready to ship.
648 Your contribution is welcome.
649 801
6507. Credits 8028. Credits
651---------- 803----------
652 804
653 Oliver Hartkopp (PF_CAN core, filters, drivers, bcm) 805 Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
654 Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan) 806 Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
655 Jan Kizka (RT-SocketCAN core, Socket-API reconciliation) 807 Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
656 Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews) 808 Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews,
809 CAN device driver interface, MSCAN driver)
657 Robert Schwebel (design reviews, PTXdist integration) 810 Robert Schwebel (design reviews, PTXdist integration)
658 Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers) 811 Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
659 Benedikt Spranger (reviews) 812 Benedikt Spranger (reviews)
660 Thomas Gleixner (LKML reviews, coding style, posting hints) 813 Thomas Gleixner (LKML reviews, coding style, posting hints)
661 Andrey Volkov (kernel subtree structure, ioctls, mscan driver) 814 Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
662 Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003) 815 Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
663 Klaus Hitschler (PEAK driver integration) 816 Klaus Hitschler (PEAK driver integration)
664 Uwe Koppe (CAN netdevices with PF_PACKET approach) 817 Uwe Koppe (CAN netdevices with PF_PACKET approach)
665 Michael Schulze (driver layer loopback requirement, RT CAN drivers review) 818 Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
819 Pavel Pisa (Bit-timing calculation)
820 Sascha Hauer (SJA1000 platform driver)
821 Sebastian Haas (SJA1000 EMS PCI driver)
822 Markus Plessing (SJA1000 EMS PCI driver)
823 Per Dalen (SJA1000 Kvaser PCI driver)
824 Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 7a3bb1abb830..b132e4a3cf0f 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -141,7 +141,8 @@ rx_ccid = 2
141 Default CCID for the receiver-sender half-connection; see tx_ccid. 141 Default CCID for the receiver-sender half-connection; see tx_ccid.
142 142
143seq_window = 100 143seq_window = 100
144 The initial sequence window (sec. 7.5.2). 144 The initial sequence window (sec. 7.5.2) of the sender. This influences
145 the local ackno validity and the remote seqno validity windows (7.5.1).
145 146
146tx_qlen = 5 147tx_qlen = 5
147 The size of the transmit buffer in packets. A value of 0 corresponds 148 The size of the transmit buffer in packets. A value of 0 corresponds
diff --git a/Documentation/networking/dm9000.txt b/Documentation/networking/dm9000.txt
index 65df3dea5561..5552e2e575c5 100644
--- a/Documentation/networking/dm9000.txt
+++ b/Documentation/networking/dm9000.txt
@@ -129,7 +129,7 @@ PHY Link state polling
129---------------------- 129----------------------
130 130
131The driver keeps track of the link state and informs the network core 131The driver keeps track of the link state and informs the network core
132about link (carrier) availablilty. This is managed by several methods 132about link (carrier) availability. This is managed by several methods
133depending on the version of the chip and on which PHY is being used. 133depending on the version of the chip and on which PHY is being used.
134 134
135For the internal PHY, the original (and currently default) method is 135For the internal PHY, the original (and currently default) method is
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
new file mode 100644
index 000000000000..a0280ad2edc9
--- /dev/null
+++ b/Documentation/networking/ieee802154.txt
@@ -0,0 +1,76 @@
1
2 Linux IEEE 802.15.4 implementation
3
4
5Introduction
6============
7
8The Linux-ZigBee project goal is to provide complete implementation
9of IEEE 802.15.4 / ZigBee / 6LoWPAN protocols. IEEE 802.15.4 is a stack
10of protocols for organizing Low-Rate Wireless Personal Area Networks.
11
12Currently only IEEE 802.15.4 layer is implemented. We have choosen
13to use plain Berkeley socket API, the generic Linux networking stack
14to transfer IEEE 802.15.4 messages and a special protocol over genetlink
15for configuration/management
16
17
18Socket API
19==========
20
21int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
22.....
23
24The address family, socket addresses etc. are defined in the
25include/net/ieee802154/af_ieee802154.h header or in the special header
26in our userspace package (see either linux-zigbee sourceforge download page
27or git tree at git://linux-zigbee.git.sourceforge.net/gitroot/linux-zigbee).
28
29One can use SOCK_RAW for passing raw data towards device xmit function. YMMV.
30
31
32MLME - MAC Level Management
33============================
34
35Most of IEEE 802.15.4 MLME interfaces are directly mapped on netlink commands.
36See the include/net/ieee802154/nl802154.h header. Our userspace tools package
37(see above) provides CLI configuration utility for radio interfaces and simple
38coordinator for IEEE 802.15.4 networks as an example users of MLME protocol.
39
40
41Kernel side
42=============
43
44Like with WiFi, there are several types of devices implementing IEEE 802.15.4.
451) 'HardMAC'. The MAC layer is implemented in the device itself, the device
46 exports MLME and data API.
472) 'SoftMAC' or just radio. These types of devices are just radio transceivers
48 possibly with some kinds of acceleration like automatic CRC computation and
49 comparation, automagic ACK handling, address matching, etc.
50
51Those types of devices require different approach to be hooked into Linux kernel.
52
53
54HardMAC
55=======
56
57See the header include/net/ieee802154/netdevice.h. You have to implement Linux
58net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family
59code via plain sk_buffs. The control block of sk_buffs will contain additional
60info as described in the struct ieee802154_mac_cb.
61
62To hook the MLME interface you have to populate the ml_priv field of your
63net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are
64required.
65
66We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c
67
68
69SoftMAC
70=======
71
72We are going to provide intermediate layer impelementing IEEE 802.15.4 MAC
73in software. This is currently WIP.
74
75See header include/net/ieee802154/mac802154.h and several drivers in
76drivers/ieee802154/
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index c7712787933c..8be76235fe67 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -2,7 +2,7 @@
2 2
3ip_forward - BOOLEAN 3ip_forward - BOOLEAN
4 0 - disabled (default) 4 0 - disabled (default)
5 not 0 - enabled 5 not 0 - enabled
6 6
7 Forward Packets between interfaces. 7 Forward Packets between interfaces.
8 8
@@ -36,49 +36,49 @@ rt_cache_rebuild_count - INTEGER
36IP Fragmentation: 36IP Fragmentation:
37 37
38ipfrag_high_thresh - INTEGER 38ipfrag_high_thresh - INTEGER
39 Maximum memory used to reassemble IP fragments. When 39 Maximum memory used to reassemble IP fragments. When
40 ipfrag_high_thresh bytes of memory is allocated for this purpose, 40 ipfrag_high_thresh bytes of memory is allocated for this purpose,
41 the fragment handler will toss packets until ipfrag_low_thresh 41 the fragment handler will toss packets until ipfrag_low_thresh
42 is reached. 42 is reached.
43 43
44ipfrag_low_thresh - INTEGER 44ipfrag_low_thresh - INTEGER
45 See ipfrag_high_thresh 45 See ipfrag_high_thresh
46 46
47ipfrag_time - INTEGER 47ipfrag_time - INTEGER
48 Time in seconds to keep an IP fragment in memory. 48 Time in seconds to keep an IP fragment in memory.
49 49
50ipfrag_secret_interval - INTEGER 50ipfrag_secret_interval - INTEGER
51 Regeneration interval (in seconds) of the hash secret (or lifetime 51 Regeneration interval (in seconds) of the hash secret (or lifetime
52 for the hash secret) for IP fragments. 52 for the hash secret) for IP fragments.
53 Default: 600 53 Default: 600
54 54
55ipfrag_max_dist - INTEGER 55ipfrag_max_dist - INTEGER
56 ipfrag_max_dist is a non-negative integer value which defines the 56 ipfrag_max_dist is a non-negative integer value which defines the
57 maximum "disorder" which is allowed among fragments which share a 57 maximum "disorder" which is allowed among fragments which share a
58 common IP source address. Note that reordering of packets is 58 common IP source address. Note that reordering of packets is
59 not unusual, but if a large number of fragments arrive from a source 59 not unusual, but if a large number of fragments arrive from a source
60 IP address while a particular fragment queue remains incomplete, it 60 IP address while a particular fragment queue remains incomplete, it
61 probably indicates that one or more fragments belonging to that queue 61 probably indicates that one or more fragments belonging to that queue
62 have been lost. When ipfrag_max_dist is positive, an additional check 62 have been lost. When ipfrag_max_dist is positive, an additional check
63 is done on fragments before they are added to a reassembly queue - if 63 is done on fragments before they are added to a reassembly queue - if
64 ipfrag_max_dist (or more) fragments have arrived from a particular IP 64 ipfrag_max_dist (or more) fragments have arrived from a particular IP
65 address between additions to any IP fragment queue using that source 65 address between additions to any IP fragment queue using that source
66 address, it's presumed that one or more fragments in the queue are 66 address, it's presumed that one or more fragments in the queue are
67 lost. The existing fragment queue will be dropped, and a new one 67 lost. The existing fragment queue will be dropped, and a new one
68 started. An ipfrag_max_dist value of zero disables this check. 68 started. An ipfrag_max_dist value of zero disables this check.
69 69
70 Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can 70 Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
71 result in unnecessarily dropping fragment queues when normal 71 result in unnecessarily dropping fragment queues when normal
72 reordering of packets occurs, which could lead to poor application 72 reordering of packets occurs, which could lead to poor application
73 performance. Using a very large value, e.g. 50000, increases the 73 performance. Using a very large value, e.g. 50000, increases the
74 likelihood of incorrectly reassembling IP fragments that originate 74 likelihood of incorrectly reassembling IP fragments that originate
75 from different IP datagrams, which could result in data corruption. 75 from different IP datagrams, which could result in data corruption.
76 Default: 64 76 Default: 64
77 77
78INET peer storage: 78INET peer storage:
79 79
80inet_peer_threshold - INTEGER 80inet_peer_threshold - INTEGER
81 The approximate size of the storage. Starting from this threshold 81 The approximate size of the storage. Starting from this threshold
82 entries will be thrown aggressively. This threshold also determines 82 entries will be thrown aggressively. This threshold also determines
83 entries' time-to-live and time intervals between garbage collection 83 entries' time-to-live and time intervals between garbage collection
84 passes. More entries, less time-to-live, less GC interval. 84 passes. More entries, less time-to-live, less GC interval.
@@ -105,7 +105,7 @@ inet_peer_gc_maxtime - INTEGER
105 in effect under low (or absent) memory pressure on the pool. 105 in effect under low (or absent) memory pressure on the pool.
106 Measured in seconds. 106 Measured in seconds.
107 107
108TCP variables: 108TCP variables:
109 109
110somaxconn - INTEGER 110somaxconn - INTEGER
111 Limit of socket listen() backlog, known in userspace as SOMAXCONN. 111 Limit of socket listen() backlog, known in userspace as SOMAXCONN.
@@ -168,7 +168,16 @@ tcp_dsack - BOOLEAN
168 Allows TCP to send "duplicate" SACKs. 168 Allows TCP to send "duplicate" SACKs.
169 169
170tcp_ecn - BOOLEAN 170tcp_ecn - BOOLEAN
171 Enable Explicit Congestion Notification in TCP. 171 Enable Explicit Congestion Notification (ECN) in TCP. ECN is only
172 used when both ends of the TCP flow support it. It is useful to
173 avoid losses due to congestion (when the bottleneck router supports
174 ECN).
175 Possible values are:
176 0 disable ECN
177 1 ECN enabled
178 2 Only server-side ECN enabled. If the other end does
179 not support ECN, behavior is like with ECN disabled.
180 Default: 2
172 181
173tcp_fack - BOOLEAN 182tcp_fack - BOOLEAN
174 Enable FACK congestion avoidance and fast retransmission. 183 Enable FACK congestion avoidance and fast retransmission.
@@ -310,7 +319,7 @@ tcp_orphan_retries - INTEGER
310 319
311tcp_reordering - INTEGER 320tcp_reordering - INTEGER
312 Maximal reordering of packets in a TCP stream. 321 Maximal reordering of packets in a TCP stream.
313 Default: 3 322 Default: 3
314 323
315tcp_retrans_collapse - BOOLEAN 324tcp_retrans_collapse - BOOLEAN
316 Bug-to-bug compatibility with some broken printers. 325 Bug-to-bug compatibility with some broken printers.
@@ -521,7 +530,7 @@ IP Variables:
521 530
522ip_local_port_range - 2 INTEGERS 531ip_local_port_range - 2 INTEGERS
523 Defines the local port range that is used by TCP and UDP to 532 Defines the local port range that is used by TCP and UDP to
524 choose the local port. The first number is the first, the 533 choose the local port. The first number is the first, the
525 second the last local port number. Default value depends on 534 second the last local port number. Default value depends on
526 amount of memory available on the system: 535 amount of memory available on the system:
527 > 128Mb 32768-61000 536 > 128Mb 32768-61000
@@ -594,12 +603,12 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN
594 603
595 If zero, icmp error messages are sent with the primary address of 604 If zero, icmp error messages are sent with the primary address of
596 the exiting interface. 605 the exiting interface.
597 606
598 If non-zero, the message will be sent with the primary address of 607 If non-zero, the message will be sent with the primary address of
599 the interface that received the packet that caused the icmp error. 608 the interface that received the packet that caused the icmp error.
600 This is the behaviour network many administrators will expect from 609 This is the behaviour network many administrators will expect from
601 a router. And it can make debugging complicated network layouts 610 a router. And it can make debugging complicated network layouts
602 much easier. 611 much easier.
603 612
604 Note that if no primary address exists for the interface selected, 613 Note that if no primary address exists for the interface selected,
605 then the primary address of the first non-loopback interface that 614 then the primary address of the first non-loopback interface that
@@ -611,7 +620,7 @@ igmp_max_memberships - INTEGER
611 Change the maximum number of multicast groups we can subscribe to. 620 Change the maximum number of multicast groups we can subscribe to.
612 Default: 20 621 Default: 20
613 622
614conf/interface/* changes special settings per interface (where "interface" is 623conf/interface/* changes special settings per interface (where "interface" is
615 the name of your network interface) 624 the name of your network interface)
616conf/all/* is special, changes the settings for all interfaces 625conf/all/* is special, changes the settings for all interfaces
617 626
@@ -625,11 +634,11 @@ log_martians - BOOLEAN
625accept_redirects - BOOLEAN 634accept_redirects - BOOLEAN
626 Accept ICMP redirect messages. 635 Accept ICMP redirect messages.
627 accept_redirects for the interface will be enabled if: 636 accept_redirects for the interface will be enabled if:
628 - both conf/{all,interface}/accept_redirects are TRUE in the case forwarding 637 - both conf/{all,interface}/accept_redirects are TRUE in the case
629 for the interface is enabled 638 forwarding for the interface is enabled
630 or 639 or
631 - at least one of conf/{all,interface}/accept_redirects is TRUE in the case 640 - at least one of conf/{all,interface}/accept_redirects is TRUE in the
632 forwarding for the interface is disabled 641 case forwarding for the interface is disabled
633 accept_redirects for the interface will be disabled otherwise 642 accept_redirects for the interface will be disabled otherwise
634 default TRUE (host) 643 default TRUE (host)
635 FALSE (router) 644 FALSE (router)
@@ -640,8 +649,8 @@ forwarding - BOOLEAN
640mc_forwarding - BOOLEAN 649mc_forwarding - BOOLEAN
641 Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE 650 Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
642 and a multicast routing daemon is required. 651 and a multicast routing daemon is required.
643 conf/all/mc_forwarding must also be set to TRUE to enable multicast routing 652 conf/all/mc_forwarding must also be set to TRUE to enable multicast
644 for the interface 653 routing for the interface
645 654
646medium_id - INTEGER 655medium_id - INTEGER
647 Integer value used to differentiate the devices by the medium they 656 Integer value used to differentiate the devices by the medium they
@@ -649,7 +658,7 @@ medium_id - INTEGER
649 the broadcast packets are received only on one of them. 658 the broadcast packets are received only on one of them.
650 The default value 0 means that the device is the only interface 659 The default value 0 means that the device is the only interface
651 to its medium, value of -1 means that medium is not known. 660 to its medium, value of -1 means that medium is not known.
652 661
653 Currently, it is used to change the proxy_arp behavior: 662 Currently, it is used to change the proxy_arp behavior:
654 the proxy_arp feature is enabled for packets forwarded between 663 the proxy_arp feature is enabled for packets forwarded between
655 two devices attached to different media. 664 two devices attached to different media.
@@ -699,16 +708,22 @@ accept_source_route - BOOLEAN
699 default TRUE (router) 708 default TRUE (router)
700 FALSE (host) 709 FALSE (host)
701 710
702rp_filter - BOOLEAN 711rp_filter - INTEGER
703 1 - do source validation by reversed path, as specified in RFC1812
704 Recommended option for single homed hosts and stub network
705 routers. Could cause troubles for complicated (not loop free)
706 networks running a slow unreliable protocol (sort of RIP),
707 or using static routes.
708
709 0 - No source validation. 712 0 - No source validation.
710 713 1 - Strict mode as defined in RFC3704 Strict Reverse Path
711 conf/all/rp_filter must also be set to TRUE to do source validation 714 Each incoming packet is tested against the FIB and if the interface
715 is not the best reverse path the packet check will fail.
716 By default failed packets are discarded.
717 2 - Loose mode as defined in RFC3704 Loose Reverse Path
718 Each incoming packet's source address is also tested against the FIB
719 and if the source address is not reachable via any interface
720 the packet check will fail.
721
722 Current recommended practice in RFC3704 is to enable strict mode
723 to prevent IP spoofing from DDos attacks. If using asymmetric routing
724 or other complicated routing, then loose mode is recommended.
725
726 conf/all/rp_filter must also be set to non-zero to do source validation
712 on the interface 727 on the interface
713 728
714 Default value is 0. Note that some distributions enable it 729 Default value is 0. Note that some distributions enable it
@@ -782,6 +797,12 @@ arp_ignore - INTEGER
782 The max value from conf/{all,interface}/arp_ignore is used 797 The max value from conf/{all,interface}/arp_ignore is used
783 when ARP request is received on the {interface} 798 when ARP request is received on the {interface}
784 799
800arp_notify - BOOLEAN
801 Define mode for notification of address and device changes.
802 0 - (default): do nothing
803 1 - Generate gratuitous arp replies when device is brought up
804 or hardware address changes.
805
785arp_accept - BOOLEAN 806arp_accept - BOOLEAN
786 Define behavior when gratuitous arp replies are received: 807 Define behavior when gratuitous arp replies are received:
787 0 - drop gratuitous arp frames 808 0 - drop gratuitous arp frames
@@ -823,7 +844,7 @@ apply to IPv6 [XXX?].
823 844
824bindv6only - BOOLEAN 845bindv6only - BOOLEAN
825 Default value for IPV6_V6ONLY socket option, 846 Default value for IPV6_V6ONLY socket option,
826 which restricts use of the IPv6 socket to IPv6 communication 847 which restricts use of the IPv6 socket to IPv6 communication
827 only. 848 only.
828 TRUE: disable IPv4-mapped address feature 849 TRUE: disable IPv4-mapped address feature
829 FALSE: enable IPv4-mapped address feature 850 FALSE: enable IPv4-mapped address feature
@@ -833,19 +854,19 @@ bindv6only - BOOLEAN
833IPv6 Fragmentation: 854IPv6 Fragmentation:
834 855
835ip6frag_high_thresh - INTEGER 856ip6frag_high_thresh - INTEGER
836 Maximum memory used to reassemble IPv6 fragments. When 857 Maximum memory used to reassemble IPv6 fragments. When
837 ip6frag_high_thresh bytes of memory is allocated for this purpose, 858 ip6frag_high_thresh bytes of memory is allocated for this purpose,
838 the fragment handler will toss packets until ip6frag_low_thresh 859 the fragment handler will toss packets until ip6frag_low_thresh
839 is reached. 860 is reached.
840 861
841ip6frag_low_thresh - INTEGER 862ip6frag_low_thresh - INTEGER
842 See ip6frag_high_thresh 863 See ip6frag_high_thresh
843 864
844ip6frag_time - INTEGER 865ip6frag_time - INTEGER
845 Time in seconds to keep an IPv6 fragment in memory. 866 Time in seconds to keep an IPv6 fragment in memory.
846 867
847ip6frag_secret_interval - INTEGER 868ip6frag_secret_interval - INTEGER
848 Regeneration interval (in seconds) of the hash secret (or lifetime 869 Regeneration interval (in seconds) of the hash secret (or lifetime
849 for the hash secret) for IPv6 fragments. 870 for the hash secret) for IPv6 fragments.
850 Default: 600 871 Default: 600
851 872
@@ -854,17 +875,17 @@ conf/default/*:
854 875
855 876
856conf/all/*: 877conf/all/*:
857 Change all the interface-specific settings. 878 Change all the interface-specific settings.
858 879
859 [XXX: Other special features than forwarding?] 880 [XXX: Other special features than forwarding?]
860 881
861conf/all/forwarding - BOOLEAN 882conf/all/forwarding - BOOLEAN
862 Enable global IPv6 forwarding between all interfaces. 883 Enable global IPv6 forwarding between all interfaces.
863 884
864 IPv4 and IPv6 work differently here; e.g. netfilter must be used 885 IPv4 and IPv6 work differently here; e.g. netfilter must be used
865 to control which interfaces may forward packets and which not. 886 to control which interfaces may forward packets and which not.
866 887
867 This also sets all interfaces' Host/Router setting 888 This also sets all interfaces' Host/Router setting
868 'forwarding' to the specified value. See below for details. 889 'forwarding' to the specified value. See below for details.
869 890
870 This referred to as global forwarding. 891 This referred to as global forwarding.
@@ -875,12 +896,12 @@ proxy_ndp - BOOLEAN
875conf/interface/*: 896conf/interface/*:
876 Change special settings per interface. 897 Change special settings per interface.
877 898
878 The functional behaviour for certain settings is different 899 The functional behaviour for certain settings is different
879 depending on whether local forwarding is enabled or not. 900 depending on whether local forwarding is enabled or not.
880 901
881accept_ra - BOOLEAN 902accept_ra - BOOLEAN
882 Accept Router Advertisements; autoconfigure using them. 903 Accept Router Advertisements; autoconfigure using them.
883 904
884 Functional default: enabled if local forwarding is disabled. 905 Functional default: enabled if local forwarding is disabled.
885 disabled if local forwarding is enabled. 906 disabled if local forwarding is enabled.
886 907
@@ -926,7 +947,7 @@ accept_source_route - INTEGER
926 Default: 0 947 Default: 0
927 948
928autoconf - BOOLEAN 949autoconf - BOOLEAN
929 Autoconfigure addresses using Prefix Information in Router 950 Autoconfigure addresses using Prefix Information in Router
930 Advertisements. 951 Advertisements.
931 952
932 Functional default: enabled if accept_ra_pinfo is enabled. 953 Functional default: enabled if accept_ra_pinfo is enabled.
@@ -935,11 +956,11 @@ autoconf - BOOLEAN
935dad_transmits - INTEGER 956dad_transmits - INTEGER
936 The amount of Duplicate Address Detection probes to send. 957 The amount of Duplicate Address Detection probes to send.
937 Default: 1 958 Default: 1
938 959
939forwarding - BOOLEAN 960forwarding - BOOLEAN
940 Configure interface-specific Host/Router behaviour. 961 Configure interface-specific Host/Router behaviour.
941 962
942 Note: It is recommended to have the same setting on all 963 Note: It is recommended to have the same setting on all
943 interfaces; mixed router/host scenarios are rather uncommon. 964 interfaces; mixed router/host scenarios are rather uncommon.
944 965
945 FALSE: 966 FALSE:
@@ -948,13 +969,13 @@ forwarding - BOOLEAN
948 969
949 1. IsRouter flag is not set in Neighbour Advertisements. 970 1. IsRouter flag is not set in Neighbour Advertisements.
950 2. Router Solicitations are being sent when necessary. 971 2. Router Solicitations are being sent when necessary.
951 3. If accept_ra is TRUE (default), accept Router 972 3. If accept_ra is TRUE (default), accept Router
952 Advertisements (and do autoconfiguration). 973 Advertisements (and do autoconfiguration).
953 4. If accept_redirects is TRUE (default), accept Redirects. 974 4. If accept_redirects is TRUE (default), accept Redirects.
954 975
955 TRUE: 976 TRUE:
956 977
957 If local forwarding is enabled, Router behaviour is assumed. 978 If local forwarding is enabled, Router behaviour is assumed.
958 This means exactly the reverse from the above: 979 This means exactly the reverse from the above:
959 980
960 1. IsRouter flag is set in Neighbour Advertisements. 981 1. IsRouter flag is set in Neighbour Advertisements.
@@ -989,7 +1010,7 @@ router_solicitation_interval - INTEGER
989 Default: 4 1010 Default: 4
990 1011
991router_solicitations - INTEGER 1012router_solicitations - INTEGER
992 Number of Router Solicitations to send until assuming no 1013 Number of Router Solicitations to send until assuming no
993 routers are present. 1014 routers are present.
994 Default: 3 1015 Default: 3
995 1016
@@ -1013,11 +1034,11 @@ temp_prefered_lft - INTEGER
1013 1034
1014max_desync_factor - INTEGER 1035max_desync_factor - INTEGER
1015 Maximum value for DESYNC_FACTOR, which is a random value 1036 Maximum value for DESYNC_FACTOR, which is a random value
1016 that ensures that clients don't synchronize with each 1037 that ensures that clients don't synchronize with each
1017 other and generate new addresses at exactly the same time. 1038 other and generate new addresses at exactly the same time.
1018 value is in seconds. 1039 value is in seconds.
1019 Default: 600 1040 Default: 600
1020 1041
1021regen_max_retry - INTEGER 1042regen_max_retry - INTEGER
1022 Number of attempts before give up attempting to generate 1043 Number of attempts before give up attempting to generate
1023 valid temporary addresses. 1044 valid temporary addresses.
@@ -1025,15 +1046,24 @@ regen_max_retry - INTEGER
1025 1046
1026max_addresses - INTEGER 1047max_addresses - INTEGER
1027 Number of maximum addresses per interface. 0 disables limitation. 1048 Number of maximum addresses per interface. 0 disables limitation.
1028 It is recommended not set too large value (or 0) because it would 1049 It is recommended not set too large value (or 0) because it would
1029 be too easy way to crash kernel to allow to create too much of 1050 be too easy way to crash kernel to allow to create too much of
1030 autoconfigured addresses. 1051 autoconfigured addresses.
1031 Default: 16 1052 Default: 16
1032 1053
1033disable_ipv6 - BOOLEAN 1054disable_ipv6 - BOOLEAN
1034 Disable IPv6 operation. 1055 Disable IPv6 operation. If accept_dad is set to 2, this value
1056 will be dynamically set to TRUE if DAD fails for the link-local
1057 address.
1035 Default: FALSE (enable IPv6 operation) 1058 Default: FALSE (enable IPv6 operation)
1036 1059
1060 When this value is changed from 1 to 0 (IPv6 is being enabled),
1061 it will dynamically create a link-local address on the given
1062 interface and start Duplicate Address Detection, if necessary.
1063
1064 When this value is changed from 0 to 1 (IPv6 is being disabled),
1065 it will dynamically delete all address on the given interface.
1066
1037accept_dad - INTEGER 1067accept_dad - INTEGER
1038 Whether to accept DAD (Duplicate Address Detection). 1068 Whether to accept DAD (Duplicate Address Detection).
1039 0: Disable DAD 1069 0: Disable DAD
@@ -1252,13 +1282,22 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
1252sctp_wmem - vector of 3 INTEGERs: min, default, max 1282sctp_wmem - vector of 3 INTEGERs: min, default, max
1253 See tcp_wmem for a description. 1283 See tcp_wmem for a description.
1254 1284
1255UNDOCUMENTED:
1256 1285
1257/proc/sys/net/core/* 1286/proc/sys/net/core/*
1258 dev_weight FIXME 1287dev_weight - INTEGER
1288 The maximum number of packets that kernel can handle on a NAPI
1289 interrupt, it's a Per-CPU variable.
1290
1291 Default: 64
1259 1292
1260/proc/sys/net/unix/* 1293/proc/sys/net/unix/*
1261 max_dgram_qlen FIXME 1294max_dgram_qlen - INTEGER
1295 The maximum length of dgram socket receive queue
1296
1297 Default: 10
1298
1299
1300UNDOCUMENTED:
1262 1301
1263/proc/sys/net/irda/* 1302/proc/sys/net/irda/*
1264 fast_poll_increase FIXME 1303 fast_poll_increase FIXME
diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt
index 268e5c103dd8..9fd7e21296c8 100644
--- a/Documentation/networking/ipv6.txt
+++ b/Documentation/networking/ipv6.txt
@@ -33,3 +33,40 @@ disable
33 33
34 A reboot is required to enable IPv6. 34 A reboot is required to enable IPv6.
35 35
36autoconf
37
38 Specifies whether to enable IPv6 address autoconfiguration
39 on all interfaces. This might be used when one does not wish
40 for addresses to be automatically generated from prefixes
41 received in Router Advertisements.
42
43 The possible values and their effects are:
44
45 0
46 IPv6 address autoconfiguration is disabled on all interfaces.
47
48 Only the IPv6 loopback address (::1) and link-local addresses
49 will be added to interfaces.
50
51 1
52 IPv6 address autoconfiguration is enabled on all interfaces.
53
54 This is the default value.
55
56disable_ipv6
57
58 Specifies whether to disable IPv6 on all interfaces.
59 This might be used when no IPv6 addresses are desired.
60
61 The possible values and their effects are:
62
63 0
64 IPv6 is enabled on all interfaces.
65
66 This is the default value.
67
68 1
69 IPv6 is disabled on all interfaces.
70
71 No IPv6 addresses will be added to interfaces.
72
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt
new file mode 100644
index 000000000000..eeb68685c788
--- /dev/null
+++ b/Documentation/networking/ixgbe.txt
@@ -0,0 +1,199 @@
1Linux Base Driver for 10 Gigabit PCI Express Intel(R) Network Connection
2========================================================================
3
4March 10, 2009
5
6
7Contents
8========
9
10- In This Release
11- Identifying Your Adapter
12- Building and Installation
13- Additional Configurations
14- Support
15
16
17
18In This Release
19===============
20
21This file describes the ixgbe Linux Base Driver for the 10 Gigabit PCI
22Express Intel(R) Network Connection. This driver includes support for
23Itanium(R)2-based systems.
24
25For questions related to hardware requirements, refer to the documentation
26supplied with your 10 Gigabit adapter. All hardware requirements listed apply
27to use with Linux.
28
29The following features are available in this kernel:
30 - Native VLANs
31 - Channel Bonding (teaming)
32 - SNMP
33 - Generic Receive Offload
34 - Data Center Bridging
35
36Channel Bonding documentation can be found in the Linux kernel source:
37/Documentation/networking/bonding.txt
38
39Ethtool, lspci, and ifconfig can be used to display device and driver
40specific information.
41
42
43Identifying Your Adapter
44========================
45
46This driver supports devices based on the 82598 controller and the 82599
47controller.
48
49For specific information on identifying which adapter you have, please visit:
50
51 http://support.intel.com/support/network/sb/CS-008441.htm
52
53
54Building and Installation
55=========================
56
57select m for "Intel(R) 10GbE PCI Express adapters support" located at:
58 Location:
59 -> Device Drivers
60 -> Network device support (NETDEVICES [=y])
61 -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
62
631. make modules & make modules_install
64
652. Load the module:
66
67# modprobe ixgbe
68
69 The insmod command can be used if the full
70 path to the driver module is specified. For example:
71
72 insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgbe/ixgbe.ko
73
74 With 2.6 based kernels also make sure that older ixgbe drivers are
75 removed from the kernel, before loading the new module:
76
77 rmmod ixgbe; modprobe ixgbe
78
793. Assign an IP address to the interface by entering the following, where
80 x is the interface number:
81
82 ifconfig ethx <IP_address>
83
844. Verify that the interface works. Enter the following, where <IP_address>
85 is the IP address for another machine on the same subnet as the interface
86 that is being tested:
87
88 ping <IP_address>
89
90
91Additional Configurations
92=========================
93
94 Viewing Link Messages
95 ---------------------
96 Link messages will not be displayed to the console if the distribution is
97 restricting system messages. In order to see network driver link messages on
98 your console, set dmesg to eight by entering the following:
99
100 dmesg -n 8
101
102 NOTE: This setting is not saved across reboots.
103
104
105 Jumbo Frames
106 ------------
107 The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
108 enabled by changing the MTU to a value larger than the default of 1500.
109 The maximum value for the MTU is 16110. Use the ifconfig command to
110 increase the MTU size. For example:
111
112 ifconfig ethx mtu 9000 up
113
114 The maximum MTU setting for Jumbo Frames is 16110. This value coincides
115 with the maximum Jumbo Frames size of 16128.
116
117 Generic Receive Offload, aka GRO
118 --------------------------------
119 The driver supports the in-kernel software implementation of GRO. GRO has
120 shown that by coalescing Rx traffic into larger chunks of data, CPU
121 utilization can be significantly reduced when under large Rx load. GRO is an
122 evolution of the previously-used LRO interface. GRO is able to coalesce
123 other protocols besides TCP. It's also safe to use with configurations that
124 are problematic for LRO, namely bridging and iSCSI.
125
126 GRO is enabled by default in the driver. Future versions of ethtool will
127 support disabling and re-enabling GRO on the fly.
128
129
130 Data Center Bridging, aka DCB
131 -----------------------------
132
133 DCB is a configuration Quality of Service implementation in hardware.
134 It uses the VLAN priority tag (802.1p) to filter traffic. That means
135 that there are 8 different priorities that traffic can be filtered into.
136 It also enables priority flow control which can limit or eliminate the
137 number of dropped packets during network stress. Bandwidth can be
138 allocated to each of these priorities, which is enforced at the hardware
139 level.
140
141 To enable DCB support in ixgbe, you must enable the DCB netlink layer to
142 allow the userspace tools (see below) to communicate with the driver.
143 This can be found in the kernel configuration here:
144
145 -> Networking support
146 -> Networking options
147 -> Data Center Bridging support
148
149 Once this is selected, DCB support must be selected for ixgbe. This can
150 be found here:
151
152 -> Device Drivers
153 -> Network device support (NETDEVICES [=y])
154 -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
155 -> Intel(R) 10GbE PCI Express adapters support
156 -> Data Center Bridging (DCB) Support
157
158 After these options are selected, you must rebuild your kernel and your
159 modules.
160
161 In order to use DCB, userspace tools must be downloaded and installed.
162 The dcbd tools can be found at:
163
164 http://e1000.sf.net
165
166
167 Ethtool
168 -------
169 The driver utilizes the ethtool interface for driver configuration and
170 diagnostics, as well as displaying statistical information. Ethtool
171 version 3.0 or later is required for this functionality.
172
173 The latest release of ethtool can be found from
174 http://sourceforge.net/projects/gkernel.
175
176
177 NAPI
178 ----
179
180 NAPI (Rx polling mode) is supported in the ixgbe driver. NAPI is enabled
181 by default in the driver.
182
183 See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
184
185
186Support
187=======
188
189For general information, go to the Intel support website at:
190
191 http://support.intel.com
192
193or the Intel Wired Networking project hosted by Sourceforge at:
194
195 http://e1000.sourceforge.net
196
197If an issue is identified with the released source code on the supported
198kernel with a supported adapter, email the specific information related
199to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
index 2451f551c505..63214b280e00 100644
--- a/Documentation/networking/l2tp.txt
+++ b/Documentation/networking/l2tp.txt
@@ -158,7 +158,7 @@ Sample Userspace Code
158 } 158 }
159 return 0; 159 return 0;
160 160
161Miscellanous 161Miscellaneous
162============ 162============
163 163
164The PPPoL2TP driver was developed as part of the OpenL2TP project by 164The PPPoL2TP driver was developed as part of the OpenL2TP project by
diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt
index 84906ef3ed6e..b30e81ad5307 100644
--- a/Documentation/networking/mac80211-injection.txt
+++ b/Documentation/networking/mac80211-injection.txt
@@ -12,38 +12,22 @@ following format:
12The radiotap format is discussed in 12The radiotap format is discussed in
13./Documentation/networking/radiotap-headers.txt. 13./Documentation/networking/radiotap-headers.txt.
14 14
15Despite 13 radiotap argument types are currently defined, most only make sense 15Despite many radiotap parameters being currently defined, most only make sense
16to appear on received packets. The following information is parsed from the 16to appear on received packets. The following information is parsed from the
17radiotap headers and used to control injection: 17radiotap headers and used to control injection:
18 18
19 * IEEE80211_RADIOTAP_RATE
20
21 rate in 500kbps units, automatic if invalid or not present
22
23
24 * IEEE80211_RADIOTAP_ANTENNA
25
26 antenna to use, automatic if not present
27
28
29 * IEEE80211_RADIOTAP_DBM_TX_POWER
30
31 transmit power in dBm, automatic if not present
32
33
34 * IEEE80211_RADIOTAP_FLAGS 19 * IEEE80211_RADIOTAP_FLAGS
35 20
36 IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated 21 IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
37 IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available 22 IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
38 IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the 23 IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
39 current fragmentation threshold. Note that 24 current fragmentation threshold.
40 this flag is only reliable when software 25
41 fragmentation is enabled)
42 26
43The injection code can also skip all other currently defined radiotap fields 27The injection code can also skip all other currently defined radiotap fields
44facilitating replay of captured radiotap headers directly. 28facilitating replay of captured radiotap headers directly.
45 29
46Here is an example valid radiotap header defining these three parameters 30Here is an example valid radiotap header defining some parameters
47 31
48 0x00, 0x00, // <-- radiotap version 32 0x00, 0x00, // <-- radiotap version
49 0x0b, 0x00, // <- radiotap header length 33 0x0b, 0x00, // <- radiotap header length
@@ -72,8 +56,8 @@ interface), along the following lines:
72... 56...
73 r = pcap_inject(ppcap, u8aSendBuffer, nLength); 57 r = pcap_inject(ppcap, u8aSendBuffer, nLength);
74 58
75You can also find sources for a complete inject test applet here: 59You can also find a link to a complete inject application here:
76 60
77http://penumbra.warmcat.com/_twk/tiki-index.php?page=packetspammer 61http://wireless.kernel.org/en/users/Documentation/packetspammer
78 62
79Andy Green <andy@warmcat.com> 63Andy Green <andy@warmcat.com>
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index a2ab6a0b116d..87b3d15f523a 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -74,7 +74,7 @@ dev->hard_start_xmit:
74 for this and return NETDEV_TX_LOCKED when the spin lock fails. 74 for this and return NETDEV_TX_LOCKED when the spin lock fails.
75 The locking there should also properly protect against 75 The locking there should also properly protect against
76 set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated. 76 set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated.
77 Dont use it for new drivers. 77 Don't use it for new drivers.
78 78
79 Context: Process with BHs disabled or BH (timer), 79 Context: Process with BHs disabled or BH (timer),
80 will be called with interrupts disabled by netconsole. 80 will be called with interrupts disabled by netconsole.
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt
index c9074f9b78bb..1a77a3cfae54 100644
--- a/Documentation/networking/operstates.txt
+++ b/Documentation/networking/operstates.txt
@@ -38,9 +38,6 @@ ifinfomsg::if_flags & IFF_LOWER_UP:
38ifinfomsg::if_flags & IFF_DORMANT: 38ifinfomsg::if_flags & IFF_DORMANT:
39 Driver has signaled netif_dormant_on() 39 Driver has signaled netif_dormant_on()
40 40
41These interface flags can also be queried without netlink using the
42SIOCGIFFLAGS ioctl.
43
44TLV IFLA_OPERSTATE 41TLV IFLA_OPERSTATE
45 42
46contains RFC2863 state of the interface in numeric representation: 43contains RFC2863 state of the interface in numeric representation:
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 07c53d596035..a22fd85e3796 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -4,16 +4,18 @@
4 4
5This file documents the CONFIG_PACKET_MMAP option available with the PACKET 5This file documents the CONFIG_PACKET_MMAP option available with the PACKET
6socket interface on 2.4 and 2.6 kernels. This type of sockets is used for 6socket interface on 2.4 and 2.6 kernels. This type of sockets is used for
7capture network traffic with utilities like tcpdump or any other that uses 7capture network traffic with utilities like tcpdump or any other that needs
8the libpcap library. 8raw access to network interface.
9
10You can find the latest version of this document at
11 9
10You can find the latest version of this document at:
12 http://pusa.uv.es/~ulisses/packet_mmap/ 11 http://pusa.uv.es/~ulisses/packet_mmap/
13 12
14Please send me your comments to 13Howto can be found at:
14 http://wiki.gnu-log.net (packet_mmap)
15 15
16Please send your comments to
16 Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es> 17 Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
18 Johann Baudy <johann.baudy@gnu-log.net>
17 19
18------------------------------------------------------------------------------- 20-------------------------------------------------------------------------------
19+ Why use PACKET_MMAP 21+ Why use PACKET_MMAP
@@ -25,19 +27,24 @@ to capture each packet, it requires two if you want to get packet's
25timestamp (like libpcap always does). 27timestamp (like libpcap always does).
26 28
27In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size 29In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size
28configurable circular buffer mapped in user space. This way reading packets just 30configurable circular buffer mapped in user space that can be used to either
29needs to wait for them, most of the time there is no need to issue a single 31send or receive packets. This way reading packets just needs to wait for them,
30system call. By using a shared buffer between the kernel and the user 32most of the time there is no need to issue a single system call. Concerning
31also has the benefit of minimizing packet copies. 33transmission, multiple packets can be sent through one system call to get the
32 34highest bandwidth.
33It's fine to use PACKET_MMAP to improve the performance of the capture process, 35By using a shared buffer between the kernel and the user also has the benefit
34but it isn't everything. At least, if you are capturing at high speeds (this 36of minimizing packet copies.
35is relative to the cpu speed), you should check if the device driver of your 37
36network interface card supports some sort of interrupt load mitigation or 38It's fine to use PACKET_MMAP to improve the performance of the capture and
37(even better) if it supports NAPI, also make sure it is enabled. 39transmission process, but it isn't everything. At least, if you are capturing
40at high speeds (this is relative to the cpu speed), you should check if the
41device driver of your network interface card supports some sort of interrupt
42load mitigation or (even better) if it supports NAPI, also make sure it is
43enabled. For transmission, check the MTU (Maximum Transmission Unit) used and
44supported by devices of your network.
38 45
39-------------------------------------------------------------------------------- 46--------------------------------------------------------------------------------
40+ How to use CONFIG_PACKET_MMAP 47+ How to use CONFIG_PACKET_MMAP to improve capture process
41-------------------------------------------------------------------------------- 48--------------------------------------------------------------------------------
42 49
43From the user standpoint, you should use the higher level libpcap library, which 50From the user standpoint, you should use the higher level libpcap library, which
@@ -57,7 +64,7 @@ the low level details or want to improve libpcap by including PACKET_MMAP
57support. 64support.
58 65
59-------------------------------------------------------------------------------- 66--------------------------------------------------------------------------------
60+ How to use CONFIG_PACKET_MMAP directly 67+ How to use CONFIG_PACKET_MMAP directly to improve capture process
61-------------------------------------------------------------------------------- 68--------------------------------------------------------------------------------
62 69
63From the system calls stand point, the use of PACKET_MMAP involves 70From the system calls stand point, the use of PACKET_MMAP involves
@@ -66,6 +73,7 @@ the following process:
66 73
67[setup] socket() -------> creation of the capture socket 74[setup] socket() -------> creation of the capture socket
68 setsockopt() ---> allocation of the circular buffer (ring) 75 setsockopt() ---> allocation of the circular buffer (ring)
76 option: PACKET_RX_RING
69 mmap() ---------> mapping of the allocated buffer to the 77 mmap() ---------> mapping of the allocated buffer to the
70 user process 78 user process
71 79
@@ -97,13 +105,75 @@ also the mapping of the circular buffer in the user process and
97the use of this buffer. 105the use of this buffer.
98 106
99-------------------------------------------------------------------------------- 107--------------------------------------------------------------------------------
108+ How to use CONFIG_PACKET_MMAP directly to improve transmission process
109--------------------------------------------------------------------------------
110Transmission process is similar to capture as shown below.
111
112[setup] socket() -------> creation of the transmission socket
113 setsockopt() ---> allocation of the circular buffer (ring)
114 option: PACKET_TX_RING
115 bind() ---------> bind transmission socket with a network interface
116 mmap() ---------> mapping of the allocated buffer to the
117 user process
118
119[transmission] poll() ---------> wait for free packets (optional)
120 send() ---------> send all packets that are set as ready in
121 the ring
122 The flag MSG_DONTWAIT can be used to return
123 before end of transfer.
124
125[shutdown] close() --------> destruction of the transmission socket and
126 deallocation of all associated resources.
127
128Binding the socket to your network interface is mandatory (with zero copy) to
129know the header size of frames used in the circular buffer.
130
131As capture, each frame contains two parts:
132
133 --------------------
134| struct tpacket_hdr | Header. It contains the status of
135| | of this frame
136|--------------------|
137| data buffer |
138. . Data that will be sent over the network interface.
139. .
140 --------------------
141
142 bind() associates the socket to your network interface thanks to
143 sll_ifindex parameter of struct sockaddr_ll.
144
145 Initialization example:
146
147 struct sockaddr_ll my_addr;
148 struct ifreq s_ifr;
149 ...
150
151 strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
152
153 /* get interface index of eth0 */
154 ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
155
156 /* fill sockaddr_ll struct to prepare binding */
157 my_addr.sll_family = AF_PACKET;
158 my_addr.sll_protocol = ETH_P_ALL;
159 my_addr.sll_ifindex = s_ifr.ifr_ifindex;
160
161 /* bind socket to eth0 */
162 bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
163
164 A complete tutorial is available at: http://wiki.gnu-log.net/
165
166--------------------------------------------------------------------------------
100+ PACKET_MMAP settings 167+ PACKET_MMAP settings
101-------------------------------------------------------------------------------- 168--------------------------------------------------------------------------------
102 169
103 170
104To setup PACKET_MMAP from user level code is done with a call like 171To setup PACKET_MMAP from user level code is done with a call like
105 172
173 - Capture process
106 setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req)) 174 setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req))
175 - Transmission process
176 setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req))
107 177
108The most significant argument in the previous call is the req parameter, 178The most significant argument in the previous call is the req parameter,
109this parameter must to have the following structure: 179this parameter must to have the following structure:
@@ -117,11 +187,11 @@ this parameter must to have the following structure:
117 }; 187 };
118 188
119This structure is defined in /usr/include/linux/if_packet.h and establishes a 189This structure is defined in /usr/include/linux/if_packet.h and establishes a
120circular buffer (ring) of unswappable memory mapped in the capture process. 190circular buffer (ring) of unswappable memory.
121Being mapped in the capture process allows reading the captured frames and 191Being mapped in the capture process allows reading the captured frames and
122related meta-information like timestamps without requiring a system call. 192related meta-information like timestamps without requiring a system call.
123 193
124Captured frames are grouped in blocks. Each block is a physically contiguous 194Frames are grouped in blocks. Each block is a physically contiguous
125region of memory and holds tp_block_size/tp_frame_size frames. The total number 195region of memory and holds tp_block_size/tp_frame_size frames. The total number
126of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because 196of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because
127 197
@@ -336,6 +406,7 @@ struct tpacket_hdr). If this field is 0 means that the frame is ready
336to be used for the kernel, If not, there is a frame the user can read 406to be used for the kernel, If not, there is a frame the user can read
337and the following flags apply: 407and the following flags apply:
338 408
409+++ Capture process:
339 from include/linux/if_packet.h 410 from include/linux/if_packet.h
340 411
341 #define TP_STATUS_COPY 2 412 #define TP_STATUS_COPY 2
@@ -391,6 +462,37 @@ packets are in the ring:
391It doesn't incur in a race condition to first check the status value and 462It doesn't incur in a race condition to first check the status value and
392then poll for frames. 463then poll for frames.
393 464
465
466++ Transmission process
467Those defines are also used for transmission:
468
469 #define TP_STATUS_AVAILABLE 0 // Frame is available
470 #define TP_STATUS_SEND_REQUEST 1 // Frame will be sent on next send()
471 #define TP_STATUS_SENDING 2 // Frame is currently in transmission
472 #define TP_STATUS_WRONG_FORMAT 4 // Frame format is not correct
473
474First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a
475packet, the user fills a data buffer of an available frame, sets tp_len to
476current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST.
477This can be done on multiple frames. Once the user is ready to transmit, it
478calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are
479forwarded to the network device. The kernel updates each status of sent
480frames with TP_STATUS_SENDING until the end of transfer.
481At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE.
482
483 header->tp_len = in_i_size;
484 header->tp_status = TP_STATUS_SEND_REQUEST;
485 retval = send(this->socket, NULL, 0, 0);
486
487The user can also use poll() to check if a buffer is available:
488(status == TP_STATUS_SENDING)
489
490 struct pollfd pfd;
491 pfd.fd = fd;
492 pfd.revents = 0;
493 pfd.events = POLLOUT;
494 retval = poll(&pfd, 1, timeout);
495
394-------------------------------------------------------------------------------- 496--------------------------------------------------------------------------------
395+ THANKS 497+ THANKS
396-------------------------------------------------------------------------------- 498--------------------------------------------------------------------------------
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt
index 6a07e45d4a93..6e8ce09f9c73 100644
--- a/Documentation/networking/phonet.txt
+++ b/Documentation/networking/phonet.txt
@@ -36,7 +36,7 @@ Phonet packets have a common header as follows:
36On Linux, the link-layer header includes the pn_media byte (see below). 36On Linux, the link-layer header includes the pn_media byte (see below).
37The next 7 bytes are part of the network-layer header. 37The next 7 bytes are part of the network-layer header.
38 38
39The device ID is split: the 6 higher-order bits consitute the device 39The device ID is split: the 6 higher-order bits constitute the device
40address, while the 2 lower-order bits are used for multiplexing, as are 40address, while the 2 lower-order bits are used for multiplexing, as are
41the 8-bit object identifiers. As such, Phonet can be considered as a 41the 8-bit object identifiers. As such, Phonet can be considered as a
42network layer with 6 bits of address space and 10 bits for transport 42network layer with 6 bits of address space and 10 bits for transport
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
new file mode 100644
index 000000000000..c67077cbeb80
--- /dev/null
+++ b/Documentation/networking/rds.txt
@@ -0,0 +1,356 @@
1
2Overview
3========
4
5This readme tries to provide some background on the hows and whys of RDS,
6and will hopefully help you find your way around the code.
7
8In addition, please see this email about RDS origins:
9http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
10
11RDS Architecture
12================
13
14RDS provides reliable, ordered datagram delivery by using a single
15reliable connection between any two nodes in the cluster. This allows
16applications to use a single socket to talk to any other process in the
17cluster - so in a cluster with N processes you need N sockets, in contrast
18to N*N if you use a connection-oriented socket transport like TCP.
19
20RDS is not Infiniband-specific; it was designed to support different
21transports. The current implementation used to support RDS over TCP as well
22as IB. Work is in progress to support RDS over iWARP, and using DCE to
23guarantee no dropped packets on Ethernet, it may be possible to use RDS over
24UDP in the future.
25
26The high-level semantics of RDS from the application's point of view are
27
28 * Addressing
29 RDS uses IPv4 addresses and 16bit port numbers to identify
30 the end point of a connection. All socket operations that involve
31 passing addresses between kernel and user space generally
32 use a struct sockaddr_in.
33
34 The fact that IPv4 addresses are used does not mean the underlying
35 transport has to be IP-based. In fact, RDS over IB uses a
36 reliable IB connection; the IP address is used exclusively to
37 locate the remote node's GID (by ARPing for the given IP).
38
39 The port space is entirely independent of UDP, TCP or any other
40 protocol.
41
42 * Socket interface
43 RDS sockets work *mostly* as you would expect from a BSD
44 socket. The next section will cover the details. At any rate,
45 all I/O is performed through the standard BSD socket API.
46 Some additions like zerocopy support are implemented through
47 control messages, while other extensions use the getsockopt/
48 setsockopt calls.
49
50 Sockets must be bound before you can send or receive data.
51 This is needed because binding also selects a transport and
52 attaches it to the socket. Once bound, the transport assignment
53 does not change. RDS will tolerate IPs moving around (eg in
54 a active-active HA scenario), but only as long as the address
55 doesn't move to a different transport.
56
57 * sysctls
58 RDS supports a number of sysctls in /proc/sys/net/rds
59
60
61Socket Interface
62================
63
64 AF_RDS, PF_RDS, SOL_RDS
65 These constants haven't been assigned yet, because RDS isn't in
66 mainline yet. Currently, the kernel module assigns some constant
67 and publishes it to user space through two sysctl files
68 /proc/sys/net/rds/pf_rds
69 /proc/sys/net/rds/sol_rds
70
71 fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
72 This creates a new, unbound RDS socket.
73
74 setsockopt(SOL_SOCKET): send and receive buffer size
75 RDS honors the send and receive buffer size socket options.
76 You are not allowed to queue more than SO_SNDSIZE bytes to
77 a socket. A message is queued when sendmsg is called, and
78 it leaves the queue when the remote system acknowledges
79 its arrival.
80
81 The SO_RCVSIZE option controls the maximum receive queue length.
82 This is a soft limit rather than a hard limit - RDS will
83 continue to accept and queue incoming messages, even if that
84 takes the queue length over the limit. However, it will also
85 mark the port as "congested" and send a congestion update to
86 the source node. The source node is supposed to throttle any
87 processes sending to this congested port.
88
89 bind(fd, &sockaddr_in, ...)
90 This binds the socket to a local IP address and port, and a
91 transport.
92
93 sendmsg(fd, ...)
94 Sends a message to the indicated recipient. The kernel will
95 transparently establish the underlying reliable connection
96 if it isn't up yet.
97
98 An attempt to send a message that exceeds SO_SNDSIZE will
99 return with -EMSGSIZE
100
101 An attempt to send a message that would take the total number
102 of queued bytes over the SO_SNDSIZE threshold will return
103 EAGAIN.
104
105 An attempt to send a message to a destination that is marked
106 as "congested" will return ENOBUFS.
107
108 recvmsg(fd, ...)
109 Receives a message that was queued to this socket. The sockets
110 recv queue accounting is adjusted, and if the queue length
111 drops below SO_SNDSIZE, the port is marked uncongested, and
112 a congestion update is sent to all peers.
113
114 Applications can ask the RDS kernel module to receive
115 notifications via control messages (for instance, there is a
116 notification when a congestion update arrived, or when a RDMA
117 operation completes). These notifications are received through
118 the msg.msg_control buffer of struct msghdr. The format of the
119 messages is described in manpages.
120
121 poll(fd)
122 RDS supports the poll interface to allow the application
123 to implement async I/O.
124
125 POLLIN handling is pretty straightforward. When there's an
126 incoming message queued to the socket, or a pending notification,
127 we signal POLLIN.
128
129 POLLOUT is a little harder. Since you can essentially send
130 to any destination, RDS will always signal POLLOUT as long as
131 there's room on the send queue (ie the number of bytes queued
132 is less than the sendbuf size).
133
134 However, the kernel will refuse to accept messages to
135 a destination marked congested - in this case you will loop
136 forever if you rely on poll to tell you what to do.
137 This isn't a trivial problem, but applications can deal with
138 this - by using congestion notifications, and by checking for
139 ENOBUFS errors returned by sendmsg.
140
141 setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
142 This allows the application to discard all messages queued to a
143 specific destination on this particular socket.
144
145 This allows the application to cancel outstanding messages if
146 it detects a timeout. For instance, if it tried to send a message,
147 and the remote host is unreachable, RDS will keep trying forever.
148 The application may decide it's not worth it, and cancel the
149 operation. In this case, it would use RDS_CANCEL_SENT_TO to
150 nuke any pending messages.
151
152
153RDMA for RDS
154============
155
156 see rds-rdma(7) manpage (available in rds-tools)
157
158
159Congestion Notifications
160========================
161
162 see rds(7) manpage
163
164
165RDS Protocol
166============
167
168 Message header
169
170 The message header is a 'struct rds_header' (see rds.h):
171 Fields:
172 h_sequence:
173 per-packet sequence number
174 h_ack:
175 piggybacked acknowledgment of last packet received
176 h_len:
177 length of data, not including header
178 h_sport:
179 source port
180 h_dport:
181 destination port
182 h_flags:
183 CONG_BITMAP - this is a congestion update bitmap
184 ACK_REQUIRED - receiver must ack this packet
185 RETRANSMITTED - packet has previously been sent
186 h_credit:
187 indicate to other end of connection that
188 it has more credits available (i.e. there is
189 more send room)
190 h_padding[4]:
191 unused, for future use
192 h_csum:
193 header checksum
194 h_exthdr:
195 optional data can be passed here. This is currently used for
196 passing RDMA-related information.
197
198 ACK and retransmit handling
199
200 One might think that with reliable IB connections you wouldn't need
201 to ack messages that have been received. The problem is that IB
202 hardware generates an ack message before it has DMAed the message
203 into memory. This creates a potential message loss if the HCA is
204 disabled for any reason between when it sends the ack and before
205 the message is DMAed and processed. This is only a potential issue
206 if another HCA is available for fail-over.
207
208 Sending an ack immediately would allow the sender to free the sent
209 message from their send queue quickly, but could cause excessive
210 traffic to be used for acks. RDS piggybacks acks on sent data
211 packets. Ack-only packets are reduced by only allowing one to be
212 in flight at a time, and by the sender only asking for acks when
213 its send buffers start to fill up. All retransmissions are also
214 acked.
215
216 Flow Control
217
218 RDS's IB transport uses a credit-based mechanism to verify that
219 there is space in the peer's receive buffers for more data. This
220 eliminates the need for hardware retries on the connection.
221
222 Congestion
223
224 Messages waiting in the receive queue on the receiving socket
225 are accounted against the sockets SO_RCVBUF option value. Only
226 the payload bytes in the message are accounted for. If the
227 number of bytes queued equals or exceeds rcvbuf then the socket
228 is congested. All sends attempted to this socket's address
229 should return block or return -EWOULDBLOCK.
230
231 Applications are expected to be reasonably tuned such that this
232 situation very rarely occurs. An application encountering this
233 "back-pressure" is considered a bug.
234
235 This is implemented by having each node maintain bitmaps which
236 indicate which ports on bound addresses are congested. As the
237 bitmap changes it is sent through all the connections which
238 terminate in the local address of the bitmap which changed.
239
240 The bitmaps are allocated as connections are brought up. This
241 avoids allocation in the interrupt handling path which queues
242 sages on sockets. The dense bitmaps let transports send the
243 entire bitmap on any bitmap change reasonably efficiently. This
244 is much easier to implement than some finer-grained
245 communication of per-port congestion. The sender does a very
246 inexpensive bit test to test if the port it's about to send to
247 is congested or not.
248
249
250RDS Transport Layer
251==================
252
253 As mentioned above, RDS is not IB-specific. Its code is divided
254 into a general RDS layer and a transport layer.
255
256 The general layer handles the socket API, congestion handling,
257 loopback, stats, usermem pinning, and the connection state machine.
258
259 The transport layer handles the details of the transport. The IB
260 transport, for example, handles all the queue pairs, work requests,
261 CM event handlers, and other Infiniband details.
262
263
264RDS Kernel Structures
265=====================
266
267 struct rds_message
268 aka possibly "rds_outgoing", the generic RDS layer copies data to
269 be sent and sets header fields as needed, based on the socket API.
270 This is then queued for the individual connection and sent by the
271 connection's transport.
272 struct rds_incoming
273 a generic struct referring to incoming data that can be handed from
274 the transport to the general code and queued by the general code
275 while the socket is awoken. It is then passed back to the transport
276 code to handle the actual copy-to-user.
277 struct rds_socket
278 per-socket information
279 struct rds_connection
280 per-connection information
281 struct rds_transport
282 pointers to transport-specific functions
283 struct rds_statistics
284 non-transport-specific statistics
285 struct rds_cong_map
286 wraps the raw congestion bitmap, contains rbnode, waitq, etc.
287
288Connection management
289=====================
290
291 Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
292 ERROR states.
293
294 The first time an attempt is made by an RDS socket to send data to
295 a node, a connection is allocated and connected. That connection is
296 then maintained forever -- if there are transport errors, the
297 connection will be dropped and re-established.
298
299 Dropping a connection while packets are queued will cause queued or
300 partially-sent datagrams to be retransmitted when the connection is
301 re-established.
302
303
304The send path
305=============
306
307 rds_sendmsg()
308 struct rds_message built from incoming data
309 CMSGs parsed (e.g. RDMA ops)
310 transport connection alloced and connected if not already
311 rds_message placed on send queue
312 send worker awoken
313 rds_send_worker()
314 calls rds_send_xmit() until queue is empty
315 rds_send_xmit()
316 transmits congestion map if one is pending
317 may set ACK_REQUIRED
318 calls transport to send either non-RDMA or RDMA message
319 (RDMA ops never retransmitted)
320 rds_ib_xmit()
321 allocs work requests from send ring
322 adds any new send credits available to peer (h_credits)
323 maps the rds_message's sg list
324 piggybacks ack
325 populates work requests
326 post send to connection's queue pair
327
328The recv path
329=============
330
331 rds_ib_recv_cq_comp_handler()
332 looks at write completions
333 unmaps recv buffer from device
334 no errors, call rds_ib_process_recv()
335 refill recv ring
336 rds_ib_process_recv()
337 validate header checksum
338 copy header to rds_ib_incoming struct if start of a new datagram
339 add to ibinc's fraglist
340 if competed datagram:
341 update cong map if datagram was cong update
342 call rds_recv_incoming() otherwise
343 note if ack is required
344 rds_recv_incoming()
345 drop duplicate packets
346 respond to pings
347 find the sock associated with this datagram
348 add to sock queue
349 wake up sock
350 do some congestion calculations
351 rds_recvmsg
352 copy data into user iovec
353 handle CMSGs
354 return to application
355
356
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index dcf31648414a..eaa1a25946c1 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -89,7 +89,7 @@ added to this document when its support is enabled.
89Device drivers who provide their own built regulatory domain 89Device drivers who provide their own built regulatory domain
90do not need a callback as the channels registered by them are 90do not need a callback as the channels registered by them are
91the only ones that will be allowed and therefore *additional* 91the only ones that will be allowed and therefore *additional*
92cannels cannot be enabled. 92channels cannot be enabled.
93 93
94Example code - drivers hinting an alpha2: 94Example code - drivers hinting an alpha2:
95------------------------------------------ 95------------------------------------------
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
new file mode 100644
index 000000000000..0e58b4539176
--- /dev/null
+++ b/Documentation/networking/timestamping.txt
@@ -0,0 +1,180 @@
1The existing interfaces for getting network packages time stamped are:
2
3* SO_TIMESTAMP
4 Generate time stamp for each incoming packet using the (not necessarily
5 monotonous!) system time. Result is returned via recv_msg() in a
6 control message as timeval (usec resolution).
7
8* SO_TIMESTAMPNS
9 Same time stamping mechanism as SO_TIMESTAMP, but returns result as
10 timespec (nsec resolution).
11
12* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
13 Only for multicasts: approximate send time stamp by receiving the looped
14 packet and using its receive time stamp.
15
16The following interface complements the existing ones: receive time
17stamps can be generated and returned for arbitrary packets and much
18closer to the point where the packet is really sent. Time stamps can
19be generated in software (as before) or in hardware (if the hardware
20has such a feature).
21
22SO_TIMESTAMPING:
23
24Instructs the socket layer which kind of information is wanted. The
25parameter is an integer with some of the following bits set. Setting
26other bits is an error and doesn't change the current state.
27
28SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware
29SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or
30 fails, then do it in software
31SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp
32 as generated by the hardware
33SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or
34 fails, then do it in software
35SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp
36SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to
37 the system time base
38SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in
39 software
40
41SOF_TIMESTAMPING_TX/RX determine how time stamps are generated.
42SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the
43following control message:
44 struct scm_timestamping {
45 struct timespec systime;
46 struct timespec hwtimetrans;
47 struct timespec hwtimeraw;
48 };
49
50recvmsg() can be used to get this control message for regular incoming
51packets. For send time stamps the outgoing packet is looped back to
52the socket's error queue with the send time stamp(s) attached. It can
53be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the
54original outgoing packet data including all headers preprended down to
55and including the link layer, the scm_timestamping control message and
56a sock_extended_err control message with ee_errno==ENOMSG and
57ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending
58bounced packet is ready for reading as far as select() is concerned.
59If the outgoing packet has to be fragmented, then only the first
60fragment is time stamped and returned to the sending socket.
61
62All three values correspond to the same event in time, but were
63generated in different ways. Each of these values may be empty (= all
64zero), in which case no such value was available. If the application
65is not interested in some of these values, they can be left blank to
66avoid the potential overhead of calculating them.
67
68systime is the value of the system time at that moment. This
69corresponds to the value also returned via SO_TIMESTAMP[NS]. If the
70time stamp was generated by hardware, then this field is
71empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is
72set.
73
74hwtimeraw is the original hardware time stamp. Filled in if
75SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its
76relation to system time should be made.
77
78hwtimetrans is the hardware time stamp transformed so that it
79corresponds as good as possible to system time. This correlation is
80not perfect; as a consequence, sorting packets received via different
81NICs by their hwtimetrans may differ from the order in which they were
82received. hwtimetrans may be non-monotonic even for the same NIC.
83Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
84by the network device and will be empty without that support.
85
86
87SIOCSHWTSTAMP:
88
89Hardware time stamping must also be initialized for each device driver
90that is expected to do hardware time stamping. The parameter is:
91
92struct hwtstamp_config {
93 int flags; /* no flags defined right now, must be zero */
94 int tx_type; /* HWTSTAMP_TX_* */
95 int rx_filter; /* HWTSTAMP_FILTER_* */
96};
97
98Desired behavior is passed into the kernel and to a specific device by
99calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
100ifr_data points to a struct hwtstamp_config. The tx_type and
101rx_filter are hints to the driver what it is expected to do. If
102the requested fine-grained filtering for incoming packets is not
103supported, the driver may time stamp more than just the requested types
104of packets.
105
106A driver which supports hardware time stamping shall update the struct
107with the actual, possibly more permissive configuration. If the
108requested packets cannot be time stamped, then nothing should be
109changed and ERANGE shall be returned (in contrast to EINVAL, which
110indicates that SIOCSHWTSTAMP is not supported at all).
111
112Only a processes with admin rights may change the configuration. User
113space is responsible to ensure that multiple processes don't interfere
114with each other and that the settings are reset.
115
116/* possible values for hwtstamp_config->tx_type */
117enum {
118 /*
119 * no outgoing packet will need hardware time stamping;
120 * should a packet arrive which asks for it, no hardware
121 * time stamping will be done
122 */
123 HWTSTAMP_TX_OFF,
124
125 /*
126 * enables hardware time stamping for outgoing packets;
127 * the sender of the packet decides which are to be
128 * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE
129 * before sending the packet
130 */
131 HWTSTAMP_TX_ON,
132};
133
134/* possible values for hwtstamp_config->rx_filter */
135enum {
136 /* time stamp no incoming packet at all */
137 HWTSTAMP_FILTER_NONE,
138
139 /* time stamp any incoming packet */
140 HWTSTAMP_FILTER_ALL,
141
142 /* return value: time stamp all packets requested plus some others */
143 HWTSTAMP_FILTER_SOME,
144
145 /* PTP v1, UDP, any kind of event packet */
146 HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
147
148 ...
149};
150
151
152DEVICE IMPLEMENTATION
153
154A driver which supports hardware time stamping must support the
155SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored
156in the skb with skb_hwtstamp_set().
157
158Time stamps for outgoing packets are to be generated as follows:
159- In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware()
160 returns non-zero. If yes, then the driver is expected
161 to do hardware time stamping.
162- If this is possible for the skb and requested, then declare
163 that the driver is doing the time stamping by calling
164 skb_hwtstamp_tx_in_progress(). A driver not supporting
165 hardware time stamping doesn't do that. A driver must never
166 touch sk_buff::tstamp! It is used to store how time stamping
167 for an outgoing packets is to be done.
168- As soon as the driver has sent the packet and/or obtained a
169 hardware time stamp for it, it passes the time stamp back by
170 calling skb_hwtstamp_tx() with the original skb, the raw
171 hardware time stamp and a handle to the device (necessary
172 to convert the hardware time stamp to system time). If obtaining
173 the hardware time stamp somehow fails, then the driver should
174 not fall back to software time stamping. The rationale is that
175 this would occur at a later time in the processing pipeline
176 than other software time stamping and therefore could lead
177 to unexpected deltas between time stamps.
178- If the driver did not call skb_hwtstamp_tx_in_progress(), then
179 dev_hard_start_xmit() checks whether software time stamping
180 is wanted as fallback and potentially generates the time stamp.
diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore
new file mode 100644
index 000000000000..71e81eb2e22f
--- /dev/null
+++ b/Documentation/networking/timestamping/.gitignore
@@ -0,0 +1 @@
timestamping
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile
new file mode 100644
index 000000000000..2a1489fdc036
--- /dev/null
+++ b/Documentation/networking/timestamping/Makefile
@@ -0,0 +1,6 @@
1CPPFLAGS = -I../../../include
2
3timestamping: timestamping.c
4
5clean:
6 rm -f timestamping
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c
new file mode 100644
index 000000000000..43d143104210
--- /dev/null
+++ b/Documentation/networking/timestamping/timestamping.c
@@ -0,0 +1,533 @@
1/*
2 * This program demonstrates how the various time stamping features in
3 * the Linux kernel work. It emulates the behavior of a PTP
4 * implementation in stand-alone master mode by sending PTPv1 Sync
5 * multicasts once every second. It looks for similar packets, but
6 * beyond that doesn't actually implement PTP.
7 *
8 * Outgoing packets are time stamped with SO_TIMESTAMPING with or
9 * without hardware support.
10 *
11 * Incoming packets are time stamped with SO_TIMESTAMPING with or
12 * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
13 * SO_TIMESTAMP[NS].
14 *
15 * Copyright (C) 2009 Intel Corporation.
16 * Author: Patrick Ohly <patrick.ohly@intel.com>
17 *
18 * This program is free software; you can redistribute it and/or modify it
19 * under the terms and conditions of the GNU General Public License,
20 * version 2, as published by the Free Software Foundation.
21 *
22 * This program is distributed in the hope it will be useful, but WITHOUT
23 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
24 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
25 * more details.
26 *
27 * You should have received a copy of the GNU General Public License along with
28 * this program; if not, write to the Free Software Foundation, Inc.,
29 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <errno.h>
35#include <string.h>
36
37#include <sys/time.h>
38#include <sys/socket.h>
39#include <sys/select.h>
40#include <sys/ioctl.h>
41#include <arpa/inet.h>
42#include <net/if.h>
43
44#include "asm/types.h"
45#include "linux/net_tstamp.h"
46#include "linux/errqueue.h"
47
48#ifndef SO_TIMESTAMPING
49# define SO_TIMESTAMPING 37
50# define SCM_TIMESTAMPING SO_TIMESTAMPING
51#endif
52
53#ifndef SO_TIMESTAMPNS
54# define SO_TIMESTAMPNS 35
55#endif
56
57#ifndef SIOCGSTAMPNS
58# define SIOCGSTAMPNS 0x8907
59#endif
60
61#ifndef SIOCSHWTSTAMP
62# define SIOCSHWTSTAMP 0x89b0
63#endif
64
65static void usage(const char *error)
66{
67 if (error)
68 printf("invalid option: %s\n", error);
69 printf("timestamping interface option*\n\n"
70 "Options:\n"
71 " IP_MULTICAST_LOOP - looping outgoing multicasts\n"
72 " SO_TIMESTAMP - normal software time stamping, ms resolution\n"
73 " SO_TIMESTAMPNS - more accurate software time stamping\n"
74 " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
75 " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
76 " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
77 " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
78 " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
79 " SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n"
80 " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
81 " SIOCGSTAMP - check last socket time stamp\n"
82 " SIOCGSTAMPNS - more accurate socket time stamp\n");
83 exit(1);
84}
85
86static void bail(const char *error)
87{
88 printf("%s: %s\n", error, strerror(errno));
89 exit(1);
90}
91
92static const unsigned char sync[] = {
93 0x00, 0x01, 0x00, 0x01,
94 0x5f, 0x44, 0x46, 0x4c,
95 0x54, 0x00, 0x00, 0x00,
96 0x00, 0x00, 0x00, 0x00,
97 0x00, 0x00, 0x00, 0x00,
98 0x01, 0x01,
99
100 /* fake uuid */
101 0x00, 0x01,
102 0x02, 0x03, 0x04, 0x05,
103
104 0x00, 0x01, 0x00, 0x37,
105 0x00, 0x00, 0x00, 0x08,
106 0x00, 0x00, 0x00, 0x00,
107 0x49, 0x05, 0xcd, 0x01,
108 0x29, 0xb1, 0x8d, 0xb0,
109 0x00, 0x00, 0x00, 0x00,
110 0x00, 0x01,
111
112 /* fake uuid */
113 0x00, 0x01,
114 0x02, 0x03, 0x04, 0x05,
115
116 0x00, 0x00, 0x00, 0x37,
117 0x00, 0x00, 0x00, 0x04,
118 0x44, 0x46, 0x4c, 0x54,
119 0x00, 0x00, 0xf0, 0x60,
120 0x00, 0x01, 0x00, 0x00,
121 0x00, 0x00, 0x00, 0x01,
122 0x00, 0x00, 0xf0, 0x60,
123 0x00, 0x00, 0x00, 0x00,
124 0x00, 0x00, 0x00, 0x04,
125 0x44, 0x46, 0x4c, 0x54,
126 0x00, 0x01,
127
128 /* fake uuid */
129 0x00, 0x01,
130 0x02, 0x03, 0x04, 0x05,
131
132 0x00, 0x00, 0x00, 0x00,
133 0x00, 0x00, 0x00, 0x00,
134 0x00, 0x00, 0x00, 0x00,
135 0x00, 0x00, 0x00, 0x00
136};
137
138static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
139{
140 struct timeval now;
141 int res;
142
143 res = sendto(sock, sync, sizeof(sync), 0,
144 addr, addr_len);
145 gettimeofday(&now, 0);
146 if (res < 0)
147 printf("%s: %s\n", "send", strerror(errno));
148 else
149 printf("%ld.%06ld: sent %d bytes\n",
150 (long)now.tv_sec, (long)now.tv_usec,
151 res);
152}
153
154static void printpacket(struct msghdr *msg, int res,
155 char *data,
156 int sock, int recvmsg_flags,
157 int siocgstamp, int siocgstampns)
158{
159 struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
160 struct cmsghdr *cmsg;
161 struct timeval tv;
162 struct timespec ts;
163 struct timeval now;
164
165 gettimeofday(&now, 0);
166
167 printf("%ld.%06ld: received %s data, %d bytes from %s, %d bytes control messages\n",
168 (long)now.tv_sec, (long)now.tv_usec,
169 (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
170 res,
171 inet_ntoa(from_addr->sin_addr),
172 msg->msg_controllen);
173 for (cmsg = CMSG_FIRSTHDR(msg);
174 cmsg;
175 cmsg = CMSG_NXTHDR(msg, cmsg)) {
176 printf(" cmsg len %d: ", cmsg->cmsg_len);
177 switch (cmsg->cmsg_level) {
178 case SOL_SOCKET:
179 printf("SOL_SOCKET ");
180 switch (cmsg->cmsg_type) {
181 case SO_TIMESTAMP: {
182 struct timeval *stamp =
183 (struct timeval *)CMSG_DATA(cmsg);
184 printf("SO_TIMESTAMP %ld.%06ld",
185 (long)stamp->tv_sec,
186 (long)stamp->tv_usec);
187 break;
188 }
189 case SO_TIMESTAMPNS: {
190 struct timespec *stamp =
191 (struct timespec *)CMSG_DATA(cmsg);
192 printf("SO_TIMESTAMPNS %ld.%09ld",
193 (long)stamp->tv_sec,
194 (long)stamp->tv_nsec);
195 break;
196 }
197 case SO_TIMESTAMPING: {
198 struct timespec *stamp =
199 (struct timespec *)CMSG_DATA(cmsg);
200 printf("SO_TIMESTAMPING ");
201 printf("SW %ld.%09ld ",
202 (long)stamp->tv_sec,
203 (long)stamp->tv_nsec);
204 stamp++;
205 printf("HW transformed %ld.%09ld ",
206 (long)stamp->tv_sec,
207 (long)stamp->tv_nsec);
208 stamp++;
209 printf("HW raw %ld.%09ld",
210 (long)stamp->tv_sec,
211 (long)stamp->tv_nsec);
212 break;
213 }
214 default:
215 printf("type %d", cmsg->cmsg_type);
216 break;
217 }
218 break;
219 case IPPROTO_IP:
220 printf("IPPROTO_IP ");
221 switch (cmsg->cmsg_type) {
222 case IP_RECVERR: {
223 struct sock_extended_err *err =
224 (struct sock_extended_err *)CMSG_DATA(cmsg);
225 printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
226 strerror(err->ee_errno),
227 err->ee_origin,
228#ifdef SO_EE_ORIGIN_TIMESTAMPING
229 err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
230 "bounced packet" : "unexpected origin"
231#else
232 "probably SO_EE_ORIGIN_TIMESTAMPING"
233#endif
234 );
235 if (res < sizeof(sync))
236 printf(" => truncated data?!");
237 else if (!memcmp(sync, data + res - sizeof(sync),
238 sizeof(sync)))
239 printf(" => GOT OUR DATA BACK (HURRAY!)");
240 break;
241 }
242 case IP_PKTINFO: {
243 struct in_pktinfo *pktinfo =
244 (struct in_pktinfo *)CMSG_DATA(cmsg);
245 printf("IP_PKTINFO interface index %u",
246 pktinfo->ipi_ifindex);
247 break;
248 }
249 default:
250 printf("type %d", cmsg->cmsg_type);
251 break;
252 }
253 break;
254 default:
255 printf("level %d type %d",
256 cmsg->cmsg_level,
257 cmsg->cmsg_type);
258 break;
259 }
260 printf("\n");
261 }
262
263 if (siocgstamp) {
264 if (ioctl(sock, SIOCGSTAMP, &tv))
265 printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno));
266 else
267 printf("SIOCGSTAMP %ld.%06ld\n",
268 (long)tv.tv_sec,
269 (long)tv.tv_usec);
270 }
271 if (siocgstampns) {
272 if (ioctl(sock, SIOCGSTAMPNS, &ts))
273 printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
274 else
275 printf("SIOCGSTAMPNS %ld.%09ld\n",
276 (long)ts.tv_sec,
277 (long)ts.tv_nsec);
278 }
279}
280
281static void recvpacket(int sock, int recvmsg_flags,
282 int siocgstamp, int siocgstampns)
283{
284 char data[256];
285 struct msghdr msg;
286 struct iovec entry;
287 struct sockaddr_in from_addr;
288 struct {
289 struct cmsghdr cm;
290 char control[512];
291 } control;
292 int res;
293
294 memset(&msg, 0, sizeof(msg));
295 msg.msg_iov = &entry;
296 msg.msg_iovlen = 1;
297 entry.iov_base = data;
298 entry.iov_len = sizeof(data);
299 msg.msg_name = (caddr_t)&from_addr;
300 msg.msg_namelen = sizeof(from_addr);
301 msg.msg_control = &control;
302 msg.msg_controllen = sizeof(control);
303
304 res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
305 if (res < 0) {
306 printf("%s %s: %s\n",
307 "recvmsg",
308 (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
309 strerror(errno));
310 } else {
311 printpacket(&msg, res, data,
312 sock, recvmsg_flags,
313 siocgstamp, siocgstampns);
314 }
315}
316
317int main(int argc, char **argv)
318{
319 int so_timestamping_flags = 0;
320 int so_timestamp = 0;
321 int so_timestampns = 0;
322 int siocgstamp = 0;
323 int siocgstampns = 0;
324 int ip_multicast_loop = 0;
325 char *interface;
326 int i;
327 int enabled = 1;
328 int sock;
329 struct ifreq device;
330 struct ifreq hwtstamp;
331 struct hwtstamp_config hwconfig, hwconfig_requested;
332 struct sockaddr_in addr;
333 struct ip_mreq imr;
334 struct in_addr iaddr;
335 int val;
336 socklen_t len;
337 struct timeval next;
338
339 if (argc < 2)
340 usage(0);
341 interface = argv[1];
342
343 for (i = 2; i < argc; i++) {
344 if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
345 so_timestamp = 1;
346 else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
347 so_timestampns = 1;
348 else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
349 siocgstamp = 1;
350 else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
351 siocgstampns = 1;
352 else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
353 ip_multicast_loop = 1;
354 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
355 so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
356 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
357 so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
358 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
359 so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
360 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
361 so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
362 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
363 so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
364 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE"))
365 so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE;
366 else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
367 so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
368 else
369 usage(argv[i]);
370 }
371
372 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
373 if (socket < 0)
374 bail("socket");
375
376 memset(&device, 0, sizeof(device));
377 strncpy(device.ifr_name, interface, sizeof(device.ifr_name));
378 if (ioctl(sock, SIOCGIFADDR, &device) < 0)
379 bail("getting interface IP address");
380
381 memset(&hwtstamp, 0, sizeof(hwtstamp));
382 strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
383 hwtstamp.ifr_data = (void *)&hwconfig;
384 memset(&hwconfig, 0, sizeof(&hwconfig));
385 hwconfig.tx_type =
386 (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
387 HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
388 hwconfig.rx_filter =
389 (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
390 HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
391 hwconfig_requested = hwconfig;
392 if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
393 if ((errno == EINVAL || errno == ENOTSUP) &&
394 hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
395 hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
396 printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
397 else
398 bail("SIOCSHWTSTAMP");
399 }
400 printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
401 hwconfig_requested.tx_type, hwconfig.tx_type,
402 hwconfig_requested.rx_filter, hwconfig.rx_filter);
403
404 /* bind to PTP port */
405 addr.sin_family = AF_INET;
406 addr.sin_addr.s_addr = htonl(INADDR_ANY);
407 addr.sin_port = htons(319 /* PTP event port */);
408 if (bind(sock,
409 (struct sockaddr *)&addr,
410 sizeof(struct sockaddr_in)) < 0)
411 bail("bind");
412
413 /* set multicast group for outgoing packets */
414 inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
415 addr.sin_addr = iaddr;
416 imr.imr_multiaddr.s_addr = iaddr.s_addr;
417 imr.imr_interface.s_addr =
418 ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
419 if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
420 &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
421 bail("set multicast");
422
423 /* join multicast group, loop our own packet */
424 if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
425 &imr, sizeof(struct ip_mreq)) < 0)
426 bail("join multicast group");
427
428 if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
429 &ip_multicast_loop, sizeof(enabled)) < 0) {
430 bail("loop multicast");
431 }
432
433 /* set socket options for time stamping */
434 if (so_timestamp &&
435 setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
436 &enabled, sizeof(enabled)) < 0)
437 bail("setsockopt SO_TIMESTAMP");
438
439 if (so_timestampns &&
440 setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
441 &enabled, sizeof(enabled)) < 0)
442 bail("setsockopt SO_TIMESTAMPNS");
443
444 if (so_timestamping_flags &&
445 setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
446 &so_timestamping_flags,
447 sizeof(so_timestamping_flags)) < 0)
448 bail("setsockopt SO_TIMESTAMPING");
449
450 /* request IP_PKTINFO for debugging purposes */
451 if (setsockopt(sock, SOL_IP, IP_PKTINFO,
452 &enabled, sizeof(enabled)) < 0)
453 printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
454
455 /* verify socket options */
456 len = sizeof(val);
457 if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
458 printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
459 else
460 printf("SO_TIMESTAMP %d\n", val);
461
462 if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
463 printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
464 strerror(errno));
465 else
466 printf("SO_TIMESTAMPNS %d\n", val);
467
468 if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
469 printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
470 strerror(errno));
471 } else {
472 printf("SO_TIMESTAMPING %d\n", val);
473 if (val != so_timestamping_flags)
474 printf(" not the expected value %d\n",
475 so_timestamping_flags);
476 }
477
478 /* send packets forever every five seconds */
479 gettimeofday(&next, 0);
480 next.tv_sec = (next.tv_sec + 1) / 5 * 5;
481 next.tv_usec = 0;
482 while (1) {
483 struct timeval now;
484 struct timeval delta;
485 long delta_us;
486 int res;
487 fd_set readfs, errorfs;
488
489 gettimeofday(&now, 0);
490 delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
491 (long)(next.tv_usec - now.tv_usec);
492 if (delta_us > 0) {
493 /* continue waiting for timeout or data */
494 delta.tv_sec = delta_us / 1000000;
495 delta.tv_usec = delta_us % 1000000;
496
497 FD_ZERO(&readfs);
498 FD_ZERO(&errorfs);
499 FD_SET(sock, &readfs);
500 FD_SET(sock, &errorfs);
501 printf("%ld.%06ld: select %ldus\n",
502 (long)now.tv_sec, (long)now.tv_usec,
503 delta_us);
504 res = select(sock + 1, &readfs, 0, &errorfs, &delta);
505 gettimeofday(&now, 0);
506 printf("%ld.%06ld: select returned: %d, %s\n",
507 (long)now.tv_sec, (long)now.tv_usec,
508 res,
509 res < 0 ? strerror(errno) : "success");
510 if (res > 0) {
511 if (FD_ISSET(sock, &readfs))
512 printf("ready for reading\n");
513 if (FD_ISSET(sock, &errorfs))
514 printf("has error\n");
515 recvpacket(sock, 0,
516 siocgstamp,
517 siocgstampns);
518 recvpacket(sock, MSG_ERRQUEUE,
519 siocgstamp,
520 siocgstampns);
521 }
522 } else {
523 /* write one packet */
524 sendpacket(sock,
525 (struct sockaddr *)&addr,
526 sizeof(addr));
527 next.tv_sec += 5;
528 continue;
529 }
530 }
531
532 return 0;
533}
diff --git a/Documentation/networking/vxge.txt b/Documentation/networking/vxge.txt
new file mode 100644
index 000000000000..d2e2997e6fa0
--- /dev/null
+++ b/Documentation/networking/vxge.txt
@@ -0,0 +1,100 @@
1Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver
2==============================================================================
3
4Contents
5--------
6
71) Introduction
82) Features supported
93) Configurable driver parameters
104) Troubleshooting
11
121) Introduction:
13----------------
14This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O
15Virtualized Server adapters.
16The X3100 series supports four modes of operation, configurable via
17firmware -
18 Single function mode
19 Multi function mode
20 SRIOV mode
21 MRIOV mode
22The functions share a 10GbE link and the pci-e bus, but hardly anything else
23inside the ASIC. Features like independent hw reset, statistics, bandwidth/
24priority allocation and guarantees, GRO, TSO, interrupt moderation etc are
25supported independently on each function.
26
27(See below for a complete list of features supported for both IPv4 and IPv6)
28
292) Features supported:
30----------------------
31
32i) Single function mode (up to 17 queues)
33
34ii) Multi function mode (up to 17 functions)
35
36iii) PCI-SIG's I/O Virtualization
37 - Single Root mode: v1.0 (up to 17 functions)
38 - Multi-Root mode: v1.0 (up to 17 functions)
39
40iv) Jumbo frames
41 X3100 Series supports MTU up to 9600 bytes, modifiable using
42 ifconfig command.
43
44v) Offloads supported: (Enabled by default)
45 Checksum offload (TCP/UDP/IP) on transmit and receive paths
46 TCP Segmentation Offload (TSO) on transmit path
47 Generic Receive Offload (GRO) on receive path
48
49vi) MSI-X: (Enabled by default)
50 Resulting in noticeable performance improvement (up to 7% on certain
51 platforms).
52
53vii) NAPI: (Enabled by default)
54 For better Rx interrupt moderation.
55
56viii)RTH (Receive Traffic Hash): (Enabled by default)
57 Receive side steering for better scaling.
58
59ix) Statistics
60 Comprehensive MAC-level and software statistics displayed using
61 "ethtool -S" option.
62
63x) Multiple hardware queues: (Enabled by default)
64 Up to 17 hardware based transmit and receive data channels, with
65 multiple steering options (transmit multiqueue enabled by default).
66
673) Configurable driver parameters:
68----------------------------------
69
70i) max_config_dev
71 Specifies maximum device functions to be enabled.
72 Valid range: 1-8
73
74ii) max_config_port
75 Specifies number of ports to be enabled.
76 Valid range: 1,2
77 Default: 1
78
79iii)max_config_vpath
80 Specifies maximum VPATH(s) configured for each device function.
81 Valid range: 1-17
82
83iv) vlan_tag_strip
84 Enables/disables vlan tag stripping from all received tagged frames that
85 are not replicated at the internal L2 switch.
86 Valid range: 0,1 (disabled, enabled respectively)
87 Default: 1
88
89v) addr_learn_en
90 Enable learning the mac address of the guest OS interface in
91 virtualization environment.
92 Valid range: 0,1 (disabled, enabled respectively)
93 Default: 0
94
954) Troubleshooting:
96-------------------
97
98To resolve an issue with the source code or X3100 series adapter, please collect
99the statistics, register dumps using ethool, relevant logs and email them to
100support@neterion.com.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 421e7d00ffd0..c9abbd86bc18 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -75,9 +75,6 @@ may need to apply in domain-specific ways to their devices:
75struct bus_type { 75struct bus_type {
76 ... 76 ...
77 int (*suspend)(struct device *dev, pm_message_t state); 77 int (*suspend)(struct device *dev, pm_message_t state);
78 int (*suspend_late)(struct device *dev, pm_message_t state);
79
80 int (*resume_early)(struct device *dev);
81 int (*resume)(struct device *dev); 78 int (*resume)(struct device *dev);
82}; 79};
83 80
@@ -226,20 +223,7 @@ The phases are seen by driver notifications issued in this order:
226 223
227 This call should handle parts of device suspend logic that require 224 This call should handle parts of device suspend logic that require
228 sleeping. It probably does work to quiesce the device which hasn't 225 sleeping. It probably does work to quiesce the device which hasn't
229 been abstracted into class.suspend() or bus.suspend_late(). 226 been abstracted into class.suspend().
230
231 3 bus.suspend_late(dev, message) is called with IRQs disabled, and
232 with only one CPU active. Until the bus.resume_early() phase
233 completes (see later), IRQs are not enabled again. This method
234 won't be exposed by all busses; for message based busses like USB,
235 I2C, or SPI, device interactions normally require IRQs. This bus
236 call may be morphed into a driver call with bus-specific parameters.
237
238 This call might save low level hardware state that might otherwise
239 be lost in the upcoming low power state, and actually put the
240 device into a low power state ... so that in some cases the device
241 may stay partly usable until this late. This "late" call may also
242 help when coping with hardware that behaves badly.
243 227
244The pm_message_t parameter is currently used to refine those semantics 228The pm_message_t parameter is currently used to refine those semantics
245(described later). 229(described later).
@@ -351,19 +335,11 @@ devices processing each phase's calls before the next phase begins.
351 335
352The phases are seen by driver notifications issued in this order: 336The phases are seen by driver notifications issued in this order:
353 337
354 1 bus.resume_early(dev) is called with IRQs disabled, and with 338 1 bus.resume(dev) reverses the effects of bus.suspend(). This may
355 only one CPU active. As with bus.suspend_late(), this method 339 be morphed into a device driver call with bus-specific parameters;
356 won't be supported on busses that require IRQs in order to 340 implementations may sleep.
357 interact with devices.
358
359 This reverses the effects of bus.suspend_late().
360
361 2 bus.resume(dev) is called next. This may be morphed into a device
362 driver call with bus-specific parameters; implementations may sleep.
363
364 This reverses the effects of bus.suspend().
365 341
366 3 class.resume(dev) is called for devices associated with a class 342 2 class.resume(dev) is called for devices associated with a class
367 that has such a method. Implementations may sleep. 343 that has such a method. Implementations may sleep.
368 344
369 This reverses the effects of class.suspend(), and would usually 345 This reverses the effects of class.suspend(), and would usually
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
index 82b7a43aadba..5f83fd24ea84 100644
--- a/Documentation/power/regulator/consumer.txt
+++ b/Documentation/power/regulator/consumer.txt
@@ -178,5 +178,5 @@ Consumers can uregister interest by calling :-
178int regulator_unregister_notifier(struct regulator *regulator, 178int regulator_unregister_notifier(struct regulator *regulator,
179 struct notifier_block *nb); 179 struct notifier_block *nb);
180 180
181Regulators use the kernel notifier framework to send event to thier interested 181Regulators use the kernel notifier framework to send event to their interested
182consumers. 182consumers.
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
index bdcb332bd7fb..0cded696ca01 100644
--- a/Documentation/power/regulator/overview.txt
+++ b/Documentation/power/regulator/overview.txt
@@ -119,7 +119,7 @@ Some terms used in this document:-
119 battery power, USB power) 119 battery power, USB power)
120 120
121 Regulator Domains: is the new current limit within the 121 Regulator Domains: is the new current limit within the
122 regulator operating parameters for input/ouput voltage. 122 regulator operating parameters for input/output voltage.
123 123
124 If the regulator request passes all the constraint tests 124 If the regulator request passes all the constraint tests
125 then the new regulator value is applied. 125 then the new regulator value is applied.
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
index 2ebdc6091ce1..514b94fc931e 100644
--- a/Documentation/power/s2ram.txt
+++ b/Documentation/power/s2ram.txt
@@ -63,7 +63,7 @@ hardware during resume operations where a value can be set that will
63survive a reboot. 63survive a reboot.
64 64
65Consequence is that after a resume (even if it is successful) your system 65Consequence is that after a resume (even if it is successful) your system
66clock will have a value corresponding to the magic mumber instead of the 66clock will have a value corresponding to the magic number instead of the
67correct date/time! It is therefore advisable to use a program like ntp-date 67correct date/time! It is therefore advisable to use a program like ntp-date
68or rdate to reset the correct date/time from an external time source when 68or rdate to reset the correct date/time from an external time source when
69using this trace option. 69using this trace option.
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
index 7b99636564c8..b967cd9137d6 100644
--- a/Documentation/power/userland-swsusp.txt
+++ b/Documentation/power/userland-swsusp.txt
@@ -109,7 +109,7 @@ unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
109still frozen when the device is being closed). 109still frozen when the device is being closed).
110 110
111Currently it is assumed that the userland utilities reading/writing the 111Currently it is assumed that the userland utilities reading/writing the
112snapshot image from/to the kernel will use a swap parition, called the resume 112snapshot image from/to the kernel will use a swap partition, called the resume
113partition, or a swap file as storage space (if a swap file is used, the resume 113partition, or a swap file as storage space (if a swap file is used, the resume
114partition is the partition that holds this file). However, this is not really 114partition is the partition that holds this file). However, this is not really
115required, as they can use, for example, a special (blank) suspend partition or 115required, as they can use, for example, a special (blank) suspend partition or
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 0ab0230cbcb0..8d999d862d0e 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -43,12 +43,11 @@ Table of Contents
43 2) Representing devices without a current OF specification 43 2) Representing devices without a current OF specification
44 a) PHY nodes 44 a) PHY nodes
45 b) Interrupt controllers 45 b) Interrupt controllers
46 c) CFI or JEDEC memory-mapped NOR flash 46 c) 4xx/Axon EMAC ethernet nodes
47 d) 4xx/Axon EMAC ethernet nodes 47 d) Xilinx IP cores
48 e) Xilinx IP cores 48 e) USB EHCI controllers
49 f) USB EHCI controllers 49 f) MDIO on GPIOs
50 g) MDIO on GPIOs 50 g) SPI busses
51 h) SPI busses
52 51
53 VII - Marvell Discovery mv64[345]6x System Controller chips 52 VII - Marvell Discovery mv64[345]6x System Controller chips
54 1) The /system-controller node 53 1) The /system-controller node
@@ -999,7 +998,7 @@ compatibility.
999 translation of SOC addresses for memory mapped SOC registers. 998 translation of SOC addresses for memory mapped SOC registers.
1000 - bus-frequency: Contains the bus frequency for the SOC node. 999 - bus-frequency: Contains the bus frequency for the SOC node.
1001 Typically, the value of this field is filled in by the boot 1000 Typically, the value of this field is filled in by the boot
1002 loader. 1001 loader.
1003 1002
1004 1003
1005 Recommended properties: 1004 Recommended properties:
@@ -1287,71 +1286,7 @@ platforms are moved over to use the flattened-device-tree model.
1287 device_type = "open-pic"; 1286 device_type = "open-pic";
1288 }; 1287 };
1289 1288
1290 c) CFI or JEDEC memory-mapped NOR flash 1289 c) 4xx/Axon EMAC ethernet nodes
1291
1292 Flash chips (Memory Technology Devices) are often used for solid state
1293 file systems on embedded devices.
1294
1295 - compatible : should contain the specific model of flash chip(s)
1296 used, if known, followed by either "cfi-flash" or "jedec-flash"
1297 - reg : Address range of the flash chip
1298 - bank-width : Width (in bytes) of the flash bank. Equal to the
1299 device width times the number of interleaved chips.
1300 - device-width : (optional) Width of a single flash chip. If
1301 omitted, assumed to be equal to 'bank-width'.
1302 - #address-cells, #size-cells : Must be present if the flash has
1303 sub-nodes representing partitions (see below). In this case
1304 both #address-cells and #size-cells must be equal to 1.
1305
1306 For JEDEC compatible devices, the following additional properties
1307 are defined:
1308
1309 - vendor-id : Contains the flash chip's vendor id (1 byte).
1310 - device-id : Contains the flash chip's device id (1 byte).
1311
1312 In addition to the information on the flash bank itself, the
1313 device tree may optionally contain additional information
1314 describing partitions of the flash address space. This can be
1315 used on platforms which have strong conventions about which
1316 portions of the flash are used for what purposes, but which don't
1317 use an on-flash partition table such as RedBoot.
1318
1319 Each partition is represented as a sub-node of the flash device.
1320 Each node's name represents the name of the corresponding
1321 partition of the flash device.
1322
1323 Flash partitions
1324 - reg : The partition's offset and size within the flash bank.
1325 - label : (optional) The label / name for this flash partition.
1326 If omitted, the label is taken from the node name (excluding
1327 the unit address).
1328 - read-only : (optional) This parameter, if present, is a hint to
1329 Linux that this flash partition should only be mounted
1330 read-only. This is usually used for flash partitions
1331 containing early-boot firmware images or data which should not
1332 be clobbered.
1333
1334 Example:
1335
1336 flash@ff000000 {
1337 compatible = "amd,am29lv128ml", "cfi-flash";
1338 reg = <ff000000 01000000>;
1339 bank-width = <4>;
1340 device-width = <1>;
1341 #address-cells = <1>;
1342 #size-cells = <1>;
1343 fs@0 {
1344 label = "fs";
1345 reg = <0 f80000>;
1346 };
1347 firmware@f80000 {
1348 label ="firmware";
1349 reg = <f80000 80000>;
1350 read-only;
1351 };
1352 };
1353
1354 d) 4xx/Axon EMAC ethernet nodes
1355 1290
1356 The EMAC ethernet controller in IBM and AMCC 4xx chips, and also 1291 The EMAC ethernet controller in IBM and AMCC 4xx chips, and also
1357 the Axon bridge. To operate this needs to interact with a ths 1292 the Axon bridge. To operate this needs to interact with a ths
@@ -1421,7 +1356,7 @@ platforms are moved over to use the flattened-device-tree model.
1421 - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY 1356 - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY
1422 for, used if phy-address is absent. bit 0x00000001 is 1357 for, used if phy-address is absent. bit 0x00000001 is
1423 MDIO address 0. 1358 MDIO address 0.
1424 For Axon it can be absent, thouugh my current driver 1359 For Axon it can be absent, though my current driver
1425 doesn't handle phy-address yet so for now, keep 1360 doesn't handle phy-address yet so for now, keep
1426 0x00ffffff in it. 1361 0x00ffffff in it.
1427 - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec 1362 - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
@@ -1499,11 +1434,11 @@ platforms are moved over to use the flattened-device-tree model.
1499 available. 1434 available.
1500 For Axon: 0x0000012a 1435 For Axon: 0x0000012a
1501 1436
1502 e) Xilinx IP cores 1437 d) Xilinx IP cores
1503 1438
1504 The Xilinx EDK toolchain ships with a set of IP cores (devices) for use 1439 The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
1505 in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range 1440 in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range
1506 of standard device types (network, serial, etc.) and miscellanious 1441 of standard device types (network, serial, etc.) and miscellaneous
1507 devices (gpio, LCD, spi, etc). Also, since these devices are 1442 devices (gpio, LCD, spi, etc). Also, since these devices are
1508 implemented within the fpga fabric every instance of the device can be 1443 implemented within the fpga fabric every instance of the device can be
1509 synthesised with different options that change the behaviour. 1444 synthesised with different options that change the behaviour.
@@ -1761,7 +1696,7 @@ platforms are moved over to use the flattened-device-tree model.
1761 listed above, nodes for these devices should include a phy-handle 1696 listed above, nodes for these devices should include a phy-handle
1762 property, and may include other common network device properties 1697 property, and may include other common network device properties
1763 like local-mac-address. 1698 like local-mac-address.
1764 1699
1765 iv) Xilinx Uartlite 1700 iv) Xilinx Uartlite
1766 1701
1767 Xilinx uartlite devices are simple fixed speed serial ports. 1702 Xilinx uartlite devices are simple fixed speed serial ports.
@@ -1793,7 +1728,7 @@ platforms are moved over to use the flattened-device-tree model.
1793 - reg-offset : A value of 3 is required 1728 - reg-offset : A value of 3 is required
1794 - reg-shift : A value of 2 is required 1729 - reg-shift : A value of 2 is required
1795 1730
1796 f) USB EHCI controllers 1731 e) USB EHCI controllers
1797 1732
1798 Required properties: 1733 Required properties:
1799 - compatible : should be "usb-ehci". 1734 - compatible : should be "usb-ehci".
@@ -1819,7 +1754,7 @@ platforms are moved over to use the flattened-device-tree model.
1819 big-endian; 1754 big-endian;
1820 }; 1755 };
1821 1756
1822 g) MDIO on GPIOs 1757 f) MDIO on GPIOs
1823 1758
1824 Currently defined compatibles: 1759 Currently defined compatibles:
1825 - virtual,gpio-mdio 1760 - virtual,gpio-mdio
@@ -1839,7 +1774,7 @@ platforms are moved over to use the flattened-device-tree model.
1839 &qe_pio_c 6>; 1774 &qe_pio_c 6>;
1840 }; 1775 };
1841 1776
1842 h) SPI (Serial Peripheral Interface) busses 1777 g) SPI (Serial Peripheral Interface) busses
1843 1778
1844 SPI busses can be described with a node for the SPI master device 1779 SPI busses can be described with a node for the SPI master device
1845 and a set of child nodes for each SPI slave on the bus. For this 1780 and a set of child nodes for each SPI slave on the bus. For this
diff --git a/Documentation/powerpc/dts-bindings/can/sja1000.txt b/Documentation/powerpc/dts-bindings/can/sja1000.txt
new file mode 100644
index 000000000000..d6d209ded937
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/can/sja1000.txt
@@ -0,0 +1,53 @@
1Memory mapped SJA1000 CAN controller from NXP (formerly Philips)
2
3Required properties:
4
5- compatible : should be "nxp,sja1000".
6
7- reg : should specify the chip select, address offset and size required
8 to map the registers of the SJA1000. The size is usually 0x80.
9
10- interrupts: property with a value describing the interrupt source
11 (number and sensitivity) required for the SJA1000.
12
13Optional properties:
14
15- nxp,external-clock-frequency : Frequency of the external oscillator
16 clock in Hz. Note that the internal clock frequency used by the
17 SJA1000 is half of that value. If not specified, a default value
18 of 16000000 (16 MHz) is used.
19
20- nxp,tx-output-mode : operation mode of the TX output control logic:
21 <0x0> : bi-phase output mode
22 <0x1> : normal output mode (default)
23 <0x2> : test output mode
24 <0x3> : clock output mode
25
26- nxp,tx-output-config : TX output pin configuration:
27 <0x01> : TX0 invert
28 <0x02> : TX0 pull-down (default)
29 <0x04> : TX0 pull-up
30 <0x06> : TX0 push-pull
31 <0x08> : TX1 invert
32 <0x10> : TX1 pull-down
33 <0x20> : TX1 pull-up
34 <0x30> : TX1 push-pull
35
36- nxp,clock-out-frequency : clock frequency in Hz on the CLKOUT pin.
37 If not specified or if the specified value is 0, the CLKOUT pin
38 will be disabled.
39
40- nxp,no-comparator-bypass : Allows to disable the CAN input comperator.
41
42For futher information, please have a look to the SJA1000 data sheet.
43
44Examples:
45
46can@3,100 {
47 compatible = "nxp,sja1000";
48 reg = <3 0x100 0x80>;
49 interrupts = <2 0>;
50 interrupt-parent = <&mpic>;
51 nxp,external-clock-frequency = <16000000>;
52};
53
diff --git a/Documentation/powerpc/dts-bindings/ecm.txt b/Documentation/powerpc/dts-bindings/ecm.txt
new file mode 100644
index 000000000000..f514f29c67d6
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/ecm.txt
@@ -0,0 +1,64 @@
1=====================================================================
2E500 LAW & Coherency Module Device Tree Binding
3Copyright (C) 2009 Freescale Semiconductor Inc.
4=====================================================================
5
6Local Access Window (LAW) Node
7
8The LAW node represents the region of CCSR space where local access
9windows are configured. For ECM based devices this is the first 4k
10of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some
11number of local access windows as specified by fsl,num-laws.
12
13PROPERTIES
14
15 - compatible
16 Usage: required
17 Value type: <string>
18 Definition: Must include "fsl,ecm-law"
19
20 - reg
21 Usage: required
22 Value type: <prop-encoded-array>
23 Definition: A standard property. The value specifies the
24 physical address offset and length of the CCSR space
25 registers.
26
27 - fsl,num-laws
28 Usage: required
29 Value type: <u32>
30 Definition: The value specifies the number of local access
31 windows for this device.
32
33=====================================================================
34
35E500 Coherency Module Node
36
37The E500 LAW node represents the region of CCSR space where ECM config
38and error reporting registers exist, this is the second 4k (0x1000)
39of CCSR space.
40
41PROPERTIES
42
43 - compatible
44 Usage: required
45 Value type: <string>
46 Definition: Must include "fsl,CHIP-ecm", "fsl,ecm" where
47 CHIP is the processor (mpc8572, mpc8544, etc.)
48
49 - reg
50 Usage: required
51 Value type: <prop-encoded-array>
52 Definition: A standard property. The value specifies the
53 physical address offset and length of the CCSR space
54 registers.
55
56 - interrupts
57 Usage: required
58 Value type: <prop-encoded-array>
59
60 - interrupt-parent
61 Usage: required
62 Value type: <phandle>
63
64=====================================================================
diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt
index 6c974d28eeb4..e8b5bc24d0ac 100644
--- a/Documentation/powerpc/dts-bindings/fsl/board.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/board.txt
@@ -38,7 +38,7 @@ Required properities:
38- reg : Should contain the address and the length of the GPIO bank 38- reg : Should contain the address and the length of the GPIO bank
39 register. 39 register.
40- #gpio-cells : Should be two. The first cell is the pin number and the 40- #gpio-cells : Should be two. The first cell is the pin number and the
41 second cell is used to specify optional paramters (currently unused). 41 second cell is used to specify optional parameters (currently unused).
42- gpio-controller : Marks the port as GPIO controller. 42- gpio-controller : Marks the port as GPIO controller.
43 43
44Example: 44Example:
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
index 088fc471e03a..160c752484b4 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
@@ -19,7 +19,7 @@ Example:
19 reg = <119c0 30>; 19 reg = <119c0 30>;
20 } 20 }
21 21
22* Properties common to mulitple CPM/QE devices 22* Properties common to multiple CPM/QE devices
23 23
24- fsl,cpm-command : This value is ORed with the opcode and command flag 24- fsl,cpm-command : This value is ORed with the opcode and command flag
25 to specify the device on which a CPM command operates. 25 to specify the device on which a CPM command operates.
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
index 1815dfede1bc..349f79fd7076 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
@@ -11,7 +11,7 @@ Required properties:
11 "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", 11 "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d",
12 "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" 12 "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank"
13- #gpio-cells : Should be two. The first cell is the pin number and the 13- #gpio-cells : Should be two. The first cell is the pin number and the
14 second cell is used to specify optional paramters (currently unused). 14 second cell is used to specify optional parameters (currently unused).
15- gpio-controller : Marks the port as GPIO controller. 15- gpio-controller : Marks the port as GPIO controller.
16 16
17Example of three SOC GPIO banks defined as gpio-controller nodes: 17Example of three SOC GPIO banks defined as gpio-controller nodes:
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
index 78790d58dc2c..6e37be1eeb2d 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
@@ -17,6 +17,9 @@ Required properties:
17- model : precise model of the QE, Can be "QE", "CPM", or "CPM2" 17- model : precise model of the QE, Can be "QE", "CPM", or "CPM2"
18- reg : offset and length of the device registers. 18- reg : offset and length of the device registers.
19- bus-frequency : the clock frequency for QUICC Engine. 19- bus-frequency : the clock frequency for QUICC Engine.
20- fsl,qe-num-riscs: define how many RISC engines the QE has.
21- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
22 threads.
20 23
21Recommended properties 24Recommended properties
22- brg-frequency : the internal clock source frequency for baud-rate 25- brg-frequency : the internal clock source frequency for baud-rate
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt
index 6c238f59b2a9..249db3a15d15 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe/firmware.txt
@@ -1,6 +1,6 @@
1* Uploaded QE firmware 1* Uploaded QE firmware
2 2
3 If a new firwmare has been uploaded to the QE (usually by the 3 If a new firmware has been uploaded to the QE (usually by the
4 boot loader), then a 'firmware' child node should be added to the QE 4 boot loader), then a 'firmware' child node should be added to the QE
5 node. This node provides information on the uploaded firmware that 5 node. This node provides information on the uploaded firmware that
6 device drivers may need. 6 device drivers may need.
diff --git a/Documentation/powerpc/dts-bindings/fsl/dma.txt b/Documentation/powerpc/dts-bindings/fsl/dma.txt
index cc453110fc46..0732cdd05ba1 100644
--- a/Documentation/powerpc/dts-bindings/fsl/dma.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/dma.txt
@@ -35,30 +35,30 @@ Example:
35 #address-cells = <1>; 35 #address-cells = <1>;
36 #size-cells = <1>; 36 #size-cells = <1>;
37 compatible = "fsl,mpc8349-dma", "fsl,elo-dma"; 37 compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
38 reg = <82a8 4>; 38 reg = <0x82a8 4>;
39 ranges = <0 8100 1a4>; 39 ranges = <0 0x8100 0x1a4>;
40 interrupt-parent = <&ipic>; 40 interrupt-parent = <&ipic>;
41 interrupts = <47 8>; 41 interrupts = <71 8>;
42 cell-index = <0>; 42 cell-index = <0>;
43 dma-channel@0 { 43 dma-channel@0 {
44 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; 44 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
45 cell-index = <0>; 45 cell-index = <0>;
46 reg = <0 80>; 46 reg = <0 0x80>;
47 }; 47 };
48 dma-channel@80 { 48 dma-channel@80 {
49 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; 49 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
50 cell-index = <1>; 50 cell-index = <1>;
51 reg = <80 80>; 51 reg = <0x80 0x80>;
52 }; 52 };
53 dma-channel@100 { 53 dma-channel@100 {
54 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; 54 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
55 cell-index = <2>; 55 cell-index = <2>;
56 reg = <100 80>; 56 reg = <0x100 0x80>;
57 }; 57 };
58 dma-channel@180 { 58 dma-channel@180 {
59 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel"; 59 compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
60 cell-index = <3>; 60 cell-index = <3>;
61 reg = <180 80>; 61 reg = <0x180 0x80>;
62 }; 62 };
63 }; 63 };
64 64
@@ -93,36 +93,36 @@ Example:
93 #address-cells = <1>; 93 #address-cells = <1>;
94 #size-cells = <1>; 94 #size-cells = <1>;
95 compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma"; 95 compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma";
96 reg = <21300 4>; 96 reg = <0x21300 4>;
97 ranges = <0 21100 200>; 97 ranges = <0 0x21100 0x200>;
98 cell-index = <0>; 98 cell-index = <0>;
99 dma-channel@0 { 99 dma-channel@0 {
100 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; 100 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
101 reg = <0 80>; 101 reg = <0 0x80>;
102 cell-index = <0>; 102 cell-index = <0>;
103 interrupt-parent = <&mpic>; 103 interrupt-parent = <&mpic>;
104 interrupts = <14 2>; 104 interrupts = <20 2>;
105 }; 105 };
106 dma-channel@80 { 106 dma-channel@80 {
107 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; 107 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
108 reg = <80 80>; 108 reg = <0x80 0x80>;
109 cell-index = <1>; 109 cell-index = <1>;
110 interrupt-parent = <&mpic>; 110 interrupt-parent = <&mpic>;
111 interrupts = <15 2>; 111 interrupts = <21 2>;
112 }; 112 };
113 dma-channel@100 { 113 dma-channel@100 {
114 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; 114 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
115 reg = <100 80>; 115 reg = <0x100 0x80>;
116 cell-index = <2>; 116 cell-index = <2>;
117 interrupt-parent = <&mpic>; 117 interrupt-parent = <&mpic>;
118 interrupts = <16 2>; 118 interrupts = <22 2>;
119 }; 119 };
120 dma-channel@180 { 120 dma-channel@180 {
121 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel"; 121 compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
122 reg = <180 80>; 122 reg = <0x180 0x80>;
123 cell-index = <3>; 123 cell-index = <3>;
124 interrupt-parent = <&mpic>; 124 interrupt-parent = <&mpic>;
125 interrupts = <17 2>; 125 interrupts = <23 2>;
126 }; 126 };
127 }; 127 };
128 128
diff --git a/Documentation/powerpc/dts-bindings/fsl/esdhc.txt b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
new file mode 100644
index 000000000000..3ed3797b5086
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
@@ -0,0 +1,25 @@
1* Freescale Enhanced Secure Digital Host Controller (eSDHC)
2
3The Enhanced Secure Digital Host Controller provides an interface
4for MMC, SD, and SDIO types of memory cards.
5
6Required properties:
7 - compatible : should be
8 "fsl,<chip>-esdhc", "fsl,esdhc"
9 - reg : should contain eSDHC registers location and length.
10 - interrupts : should contain eSDHC interrupt.
11 - interrupt-parent : interrupt source phandle.
12 - clock-frequency : specifies eSDHC base clock frequency.
13 - sdhci,1-bit-only : (optional) specifies that a controller can
14 only handle 1-bit data transfers.
15
16Example:
17
18sdhci@2e000 {
19 compatible = "fsl,mpc8378-esdhc", "fsl,esdhc";
20 reg = <0x2e000 0x1000>;
21 interrupts = <42 0x8>;
22 interrupt-parent = <&ipic>;
23 /* Filled in by U-Boot */
24 clock-frequency = <0>;
25};
diff --git a/Documentation/powerpc/dts-bindings/fsl/i2c.txt b/Documentation/powerpc/dts-bindings/fsl/i2c.txt
index d0ab33e21fe6..b6d2e21474f9 100644
--- a/Documentation/powerpc/dts-bindings/fsl/i2c.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/i2c.txt
@@ -7,8 +7,10 @@ Required properties :
7 7
8Recommended properties : 8Recommended properties :
9 9
10 - compatible : Should be "fsl-i2c" for parts compatible with 10 - compatible : compatibility list with 2 entries, the first should
11 Freescale I2C specifications. 11 be "fsl,CHIP-i2c" where CHIP is the name of a compatible processor,
12 e.g. mpc8313, mpc8543, mpc8544, mpc5200 or mpc5200b. The second one
13 should be "fsl-i2c".
12 - interrupts : <a b> where a is the interrupt number and b is a 14 - interrupts : <a b> where a is the interrupt number and b is a
13 field that represents an encoding of the sense and level 15 field that represents an encoding of the sense and level
14 information for the interrupt. This should be encoded based on 16 information for the interrupt. This should be encoded based on
@@ -16,17 +18,31 @@ Recommended properties :
16 controller you have. 18 controller you have.
17 - interrupt-parent : the phandle for the interrupt controller that 19 - interrupt-parent : the phandle for the interrupt controller that
18 services interrupts for this device. 20 services interrupts for this device.
19 - dfsrr : boolean; if defined, indicates that this I2C device has 21 - fsl,preserve-clocking : boolean; if defined, the clock settings
20 a digital filter sampling rate register 22 from the bootloader are preserved (not touched).
21 - fsl5200-clocking : boolean; if defined, indicated that this device 23 - clock-frequency : desired I2C bus clock frequency in Hz.
22 uses the FSL 5200 clocking mechanism. 24
23 25Examples :
24Example : 26
25 i2c@3000 { 27 i2c@3d00 {
26 interrupt-parent = <40000>; 28 #address-cells = <1>;
27 interrupts = <1b 3>; 29 #size-cells = <0>;
28 reg = <3000 18>; 30 compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
29 device_type = "i2c"; 31 cell-index = <0>;
30 compatible = "fsl-i2c"; 32 reg = <0x3d00 0x40>;
31 dfsrr; 33 interrupts = <2 15 0>;
34 interrupt-parent = <&mpc5200_pic>;
35 fsl,preserve-clocking;
32 }; 36 };
37
38 i2c@3100 {
39 #address-cells = <1>;
40 #size-cells = <0>;
41 cell-index = <1>;
42 compatible = "fsl,mpc8544-i2c", "fsl-i2c";
43 reg = <0x3100 0x100>;
44 interrupts = <43 2>;
45 interrupt-parent = <&mpic>;
46 clock-frequency = <400000>;
47 };
48
diff --git a/Documentation/powerpc/dts-bindings/fsl/mcm.txt b/Documentation/powerpc/dts-bindings/fsl/mcm.txt
new file mode 100644
index 000000000000..4ceda9b3b413
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/fsl/mcm.txt
@@ -0,0 +1,64 @@
1=====================================================================
2MPX LAW & Coherency Module Device Tree Binding
3Copyright (C) 2009 Freescale Semiconductor Inc.
4=====================================================================
5
6Local Access Window (LAW) Node
7
8The LAW node represents the region of CCSR space where local access
9windows are configured. For MCM based devices this is the first 4k
10of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some
11number of local access windows as specified by fsl,num-laws.
12
13PROPERTIES
14
15 - compatible
16 Usage: required
17 Value type: <string>
18 Definition: Must include "fsl,mcm-law"
19
20 - reg
21 Usage: required
22 Value type: <prop-encoded-array>
23 Definition: A standard property. The value specifies the
24 physical address offset and length of the CCSR space
25 registers.
26
27 - fsl,num-laws
28 Usage: required
29 Value type: <u32>
30 Definition: The value specifies the number of local access
31 windows for this device.
32
33=====================================================================
34
35MPX Coherency Module Node
36
37The MPX LAW node represents the region of CCSR space where MCM config
38and error reporting registers exist, this is the second 4k (0x1000)
39of CCSR space.
40
41PROPERTIES
42
43 - compatible
44 Usage: required
45 Value type: <string>
46 Definition: Must include "fsl,CHIP-mcm", "fsl,mcm" where
47 CHIP is the processor (mpc8641, mpc8610, etc.)
48
49 - reg
50 Usage: required
51 Value type: <prop-encoded-array>
52 Definition: A standard property. The value specifies the
53 physical address offset and length of the CCSR space
54 registers.
55
56 - interrupts
57 Usage: required
58 Value type: <prop-encoded-array>
59
60 - interrupt-parent
61 Usage: required
62 Value type: <phandle>
63
64=====================================================================
diff --git a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
index b26b91992c55..bcc30bac6831 100644
--- a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
@@ -1,6 +1,6 @@
1* Freescale MSI interrupt controller 1* Freescale MSI interrupt controller
2 2
3Reguired properities: 3Required properties:
4- compatible : compatible list, contains 2 entries, 4- compatible : compatible list, contains 2 entries,
5 first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572, 5 first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572,
6 etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on 6 etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on
diff --git a/Documentation/powerpc/dts-bindings/fsl/pmc.txt b/Documentation/powerpc/dts-bindings/fsl/pmc.txt
index 02f6f43ee1b7..07256b7ffcaa 100644
--- a/Documentation/powerpc/dts-bindings/fsl/pmc.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/pmc.txt
@@ -15,8 +15,8 @@ Properties:
15 compatible; all statements below that apply to "fsl,mpc8548-pmc" also 15 compatible; all statements below that apply to "fsl,mpc8548-pmc" also
16 apply to "fsl,mpc8641d-pmc". 16 apply to "fsl,mpc8641d-pmc".
17 17
18 Compatibility does not include bit assigments in SCCR/PMCDR/DEVDISR; these 18 Compatibility does not include bit assignments in SCCR/PMCDR/DEVDISR; these
19 bit assigments are indicated via the sleep specifier in each device's 19 bit assignments are indicated via the sleep specifier in each device's
20 sleep property. 20 sleep property.
21 21
22- reg: For devices compatible with "fsl,mpc8349-pmc", the first resource 22- reg: For devices compatible with "fsl,mpc8349-pmc", the first resource
diff --git a/Documentation/powerpc/dts-bindings/fsl/ssi.txt b/Documentation/powerpc/dts-bindings/fsl/ssi.txt
index a2d963998a65..5ff76c9c57d2 100644
--- a/Documentation/powerpc/dts-bindings/fsl/ssi.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/ssi.txt
@@ -4,44 +4,56 @@ The SSI is a serial device that communicates with audio codecs. It can
4be programmed in AC97, I2S, left-justified, or right-justified modes. 4be programmed in AC97, I2S, left-justified, or right-justified modes.
5 5
6Required properties: 6Required properties:
7- compatible : compatible list, containing "fsl,ssi" 7- compatible: Compatible list, contains "fsl,ssi".
8- cell-index : the SSI, <0> = SSI1, <1> = SSI2, and so on 8- cell-index: The SSI, <0> = SSI1, <1> = SSI2, and so on.
9- reg : offset and length of the register set for the device 9- reg: Offset and length of the register set for the device.
10- interrupts : <a b> where a is the interrupt number and b is a 10- interrupts: <a b> where a is the interrupt number and b is a
11 field that represents an encoding of the sense and 11 field that represents an encoding of the sense and
12 level information for the interrupt. This should be 12 level information for the interrupt. This should be
13 encoded based on the information in section 2) 13 encoded based on the information in section 2)
14 depending on the type of interrupt controller you 14 depending on the type of interrupt controller you
15 have. 15 have.
16- interrupt-parent : the phandle for the interrupt controller that 16- interrupt-parent: The phandle for the interrupt controller that
17 services interrupts for this device. 17 services interrupts for this device.
18- fsl,mode : the operating mode for the SSI interface 18- fsl,mode: The operating mode for the SSI interface.
19 "i2s-slave" - I2S mode, SSI is clock slave 19 "i2s-slave" - I2S mode, SSI is clock slave
20 "i2s-master" - I2S mode, SSI is clock master 20 "i2s-master" - I2S mode, SSI is clock master
21 "lj-slave" - left-justified mode, SSI is clock slave 21 "lj-slave" - left-justified mode, SSI is clock slave
22 "lj-master" - l.j. mode, SSI is clock master 22 "lj-master" - l.j. mode, SSI is clock master
23 "rj-slave" - right-justified mode, SSI is clock slave 23 "rj-slave" - right-justified mode, SSI is clock slave
24 "rj-master" - r.j., SSI is clock master 24 "rj-master" - r.j., SSI is clock master
25 "ac97-slave" - AC97 mode, SSI is clock slave 25 "ac97-slave" - AC97 mode, SSI is clock slave
26 "ac97-master" - AC97 mode, SSI is clock master 26 "ac97-master" - AC97 mode, SSI is clock master
27- fsl,playback-dma: phandle to a node for the DMA channel to use for 27- fsl,playback-dma: Phandle to a node for the DMA channel to use for
28 playback of audio. This is typically dictated by SOC 28 playback of audio. This is typically dictated by SOC
29 design. See the notes below. 29 design. See the notes below.
30- fsl,capture-dma: phandle to a node for the DMA channel to use for 30- fsl,capture-dma: Phandle to a node for the DMA channel to use for
31 capture (recording) of audio. This is typically dictated 31 capture (recording) of audio. This is typically dictated
32 by SOC design. See the notes below. 32 by SOC design. See the notes below.
33- fsl,fifo-depth: The number of elements in the transmit and receive FIFOs.
34 This number is the maximum allowed value for SFCSR[TFWM0].
35- fsl,ssi-asynchronous:
36 If specified, the SSI is to be programmed in asynchronous
37 mode. In this mode, pins SRCK, STCK, SRFS, and STFS must
38 all be connected to valid signals. In synchronous mode,
39 SRCK and SRFS are ignored. Asynchronous mode allows
40 playback and capture to use different sample sizes and
41 sample rates. Some drivers may require that SRCK and STCK
42 be connected together, and SRFS and STFS be connected
43 together. This would still allow different sample sizes,
44 but not different sample rates.
33 45
34Optional properties: 46Optional properties:
35- codec-handle : phandle to a 'codec' node that defines an audio 47- codec-handle: Phandle to a 'codec' node that defines an audio
36 codec connected to this SSI. This node is typically 48 codec connected to this SSI. This node is typically
37 a child of an I2C or other control node. 49 a child of an I2C or other control node.
38 50
39Child 'codec' node required properties: 51Child 'codec' node required properties:
40- compatible : compatible list, contains the name of the codec 52- compatible: Compatible list, contains the name of the codec
41 53
42Child 'codec' node optional properties: 54Child 'codec' node optional properties:
43- clock-frequency : The frequency of the input clock, which typically 55- clock-frequency: The frequency of the input clock, which typically comes
44 comes from an on-board dedicated oscillator. 56 from an on-board dedicated oscillator.
45 57
46Notes on fsl,playback-dma and fsl,capture-dma: 58Notes on fsl,playback-dma and fsl,capture-dma:
47 59
diff --git a/Documentation/powerpc/dts-bindings/fsl/tsec.txt b/Documentation/powerpc/dts-bindings/fsl/tsec.txt
index 7fa4b27574b5..edb7ae19e868 100644
--- a/Documentation/powerpc/dts-bindings/fsl/tsec.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/tsec.txt
@@ -56,6 +56,12 @@ Properties:
56 hardware. 56 hardware.
57 - fsl,magic-packet : If present, indicates that the hardware supports 57 - fsl,magic-packet : If present, indicates that the hardware supports
58 waking up via magic packet. 58 waking up via magic packet.
59 - bd-stash : If present, indicates that the hardware supports stashing
60 buffer descriptors in the L2.
61 - rx-stash-len : Denotes the number of bytes of a received buffer to stash
62 in the L2.
63 - rx-stash-idx : Denotes the index of the first byte from the received
64 buffer to stash in the L2.
59 65
60Example: 66Example:
61 ethernet@24000 { 67 ethernet@24000 {
diff --git a/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt b/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt
index 84a04d5eb8e6..a48b2cadc7f0 100644
--- a/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/upm-nand.txt
@@ -5,9 +5,21 @@ Required properties:
5- reg : should specify localbus chip select and size used for the chip. 5- reg : should specify localbus chip select and size used for the chip.
6- fsl,upm-addr-offset : UPM pattern offset for the address latch. 6- fsl,upm-addr-offset : UPM pattern offset for the address latch.
7- fsl,upm-cmd-offset : UPM pattern offset for the command latch. 7- fsl,upm-cmd-offset : UPM pattern offset for the command latch.
8- gpios : may specify optional GPIO connected to the Ready-Not-Busy pin.
9 8
10Example: 9Optional properties:
10- fsl,upm-wait-flags : add chip-dependent short delays after running the
11 UPM pattern (0x1), after writing a data byte (0x2) or after
12 writing out a buffer (0x4).
13- fsl,upm-addr-line-cs-offsets : address offsets for multi-chip support.
14 The corresponding address lines are used to select the chip.
15- gpios : may specify optional GPIOs connected to the Ready-Not-Busy pins
16 (R/B#). For multi-chip devices, "n" GPIO definitions are required
17 according to the number of chips.
18- chip-delay : chip dependent delay for transfering data from array to
19 read registers (tR). Required if property "gpios" is not used
20 (R/B# pins not connected).
21
22Examples:
11 23
12upm@1,0 { 24upm@1,0 {
13 compatible = "fsl,upm-nand"; 25 compatible = "fsl,upm-nand";
@@ -26,3 +38,26 @@ upm@1,0 {
26 }; 38 };
27 }; 39 };
28}; 40};
41
42upm@3,0 {
43 #address-cells = <0>;
44 #size-cells = <0>;
45 compatible = "tqc,tqm8548-upm-nand", "fsl,upm-nand";
46 reg = <3 0x0 0x800>;
47 fsl,upm-addr-offset = <0x10>;
48 fsl,upm-cmd-offset = <0x08>;
49 /* Multi-chip NAND device */
50 fsl,upm-addr-line-cs-offsets = <0x0 0x200>;
51 fsl,upm-wait-flags = <0x5>;
52 chip-delay = <25>; // in micro-seconds
53
54 nand@0 {
55 #address-cells = <1>;
56 #size-cells = <1>;
57
58 partition@0 {
59 label = "fs";
60 reg = <0x00000000 0x10000000>;
61 };
62 };
63};
diff --git a/Documentation/powerpc/dts-bindings/gpio/led.txt b/Documentation/powerpc/dts-bindings/gpio/led.txt
index ff51f4c0fa9d..4fe14deedc0a 100644
--- a/Documentation/powerpc/dts-bindings/gpio/led.txt
+++ b/Documentation/powerpc/dts-bindings/gpio/led.txt
@@ -1,15 +1,43 @@
1LED connected to GPIO 1LEDs connected to GPIO lines
2 2
3Required properties: 3Required properties:
4- compatible : should be "gpio-led". 4- compatible : should be "gpio-leds".
5- label : (optional) the label for this LED. If omitted, the label is 5
6Each LED is represented as a sub-node of the gpio-leds device. Each
7node's name represents the name of the corresponding LED.
8
9LED sub-node properties:
10- gpios : Should specify the LED's GPIO, see "Specifying GPIO information
11 for devices" in Documentation/powerpc/booting-without-of.txt. Active
12 low LEDs should be indicated using flags in the GPIO specifier.
13- label : (optional) The label for this LED. If omitted, the label is
6 taken from the node name (excluding the unit address). 14 taken from the node name (excluding the unit address).
7- gpios : should specify LED GPIO. 15- linux,default-trigger : (optional) This parameter, if present, is a
16 string defining the trigger assigned to the LED. Current triggers are:
17 "backlight" - LED will act as a back-light, controlled by the framebuffer
18 system
19 "default-on" - LED will turn on
20 "heartbeat" - LED "double" flashes at a load average based rate
21 "ide-disk" - LED indicates disk activity
22 "timer" - LED flashes at a fixed, configurable rate
8 23
9Example: 24Examples:
10 25
11led@0 { 26leds {
12 compatible = "gpio-led"; 27 compatible = "gpio-leds";
13 label = "hdd"; 28 hdd {
14 gpios = <&mcu_pio 0 1>; 29 label = "IDE Activity";
30 gpios = <&mcu_pio 0 1>; /* Active low */
31 linux,default-trigger = "ide-disk";
32 };
15}; 33};
34
35run-control {
36 compatible = "gpio-leds";
37 red {
38 gpios = <&mpc8572 6 0>;
39 };
40 green {
41 gpios = <&mpc8572 7 0>;
42 };
43}
diff --git a/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt
new file mode 100644
index 000000000000..c39ac2891951
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/mmc-spi-slot.txt
@@ -0,0 +1,23 @@
1MMC/SD/SDIO slot directly connected to a SPI bus
2
3Required properties:
4- compatible : should be "mmc-spi-slot".
5- reg : should specify SPI address (chip-select number).
6- spi-max-frequency : maximum frequency for this device (Hz).
7- voltage-ranges : two cells are required, first cell specifies minimum
8 slot voltage (mV), second cell specifies maximum slot voltage (mV).
9 Several ranges could be specified.
10- gpios : (optional) may specify GPIOs in this order: Card-Detect GPIO,
11 Write-Protect GPIO.
12
13Example:
14
15 mmc-slot@0 {
16 compatible = "fsl,mpc8323rdb-mmc-slot",
17 "mmc-spi-slot";
18 reg = <0>;
19 gpios = <&qe_pio_d 14 1
20 &qe_pio_d 15 0>;
21 voltage-ranges = <3300 3300>;
22 spi-max-frequency = <50000000>;
23 };
diff --git a/Documentation/powerpc/dts-bindings/mtd-physmap.txt b/Documentation/powerpc/dts-bindings/mtd-physmap.txt
new file mode 100644
index 000000000000..667c9bde8699
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/mtd-physmap.txt
@@ -0,0 +1,80 @@
1CFI or JEDEC memory-mapped NOR flash
2
3Flash chips (Memory Technology Devices) are often used for solid state
4file systems on embedded devices.
5
6 - compatible : should contain the specific model of flash chip(s)
7 used, if known, followed by either "cfi-flash" or "jedec-flash"
8 - reg : Address range(s) of the flash chip(s)
9 It's possible to (optionally) define multiple "reg" tuples so that
10 non-identical NOR chips can be described in one flash node.
11 - bank-width : Width (in bytes) of the flash bank. Equal to the
12 device width times the number of interleaved chips.
13 - device-width : (optional) Width of a single flash chip. If
14 omitted, assumed to be equal to 'bank-width'.
15 - #address-cells, #size-cells : Must be present if the flash has
16 sub-nodes representing partitions (see below). In this case
17 both #address-cells and #size-cells must be equal to 1.
18
19For JEDEC compatible devices, the following additional properties
20are defined:
21
22 - vendor-id : Contains the flash chip's vendor id (1 byte).
23 - device-id : Contains the flash chip's device id (1 byte).
24
25In addition to the information on the flash bank itself, the
26device tree may optionally contain additional information
27describing partitions of the flash address space. This can be
28used on platforms which have strong conventions about which
29portions of the flash are used for what purposes, but which don't
30use an on-flash partition table such as RedBoot.
31
32Each partition is represented as a sub-node of the flash device.
33Each node's name represents the name of the corresponding
34partition of the flash device.
35
36Flash partitions
37 - reg : The partition's offset and size within the flash bank.
38 - label : (optional) The label / name for this flash partition.
39 If omitted, the label is taken from the node name (excluding
40 the unit address).
41 - read-only : (optional) This parameter, if present, is a hint to
42 Linux that this flash partition should only be mounted
43 read-only. This is usually used for flash partitions
44 containing early-boot firmware images or data which should not
45 be clobbered.
46
47Example:
48
49 flash@ff000000 {
50 compatible = "amd,am29lv128ml", "cfi-flash";
51 reg = <ff000000 01000000>;
52 bank-width = <4>;
53 device-width = <1>;
54 #address-cells = <1>;
55 #size-cells = <1>;
56 fs@0 {
57 label = "fs";
58 reg = <0 f80000>;
59 };
60 firmware@f80000 {
61 label ="firmware";
62 reg = <f80000 80000>;
63 read-only;
64 };
65 };
66
67Here an example with multiple "reg" tuples:
68
69 flash@f0000000,0 {
70 #address-cells = <1>;
71 #size-cells = <1>;
72 compatible = "intel,PC48F4400P0VB", "cfi-flash";
73 reg = <0 0x00000000 0x02000000
74 0 0x02000000 0x02000000>;
75 bank-width = <2>;
76 partition@0 {
77 label = "test-part1";
78 reg = <0 0x04000000>;
79 };
80 };
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
index 06da4d4b44f9..2031ddb33d09 100644
--- a/Documentation/powerpc/qe_firmware.txt
+++ b/Documentation/powerpc/qe_firmware.txt
@@ -225,7 +225,7 @@ For example, to match the 8323, revision 1.0:
225 soc.major = 1 225 soc.major = 1
226 soc.minor = 0 226 soc.minor = 0
227 227
228'padding' is neccessary for structure alignment. This field ensures that the 228'padding' is necessary for structure alignment. This field ensures that the
229'extended_modes' field is aligned on a 64-bit boundary. 229'extended_modes' field is aligned on a 64-bit boundary.
230 230
231'extended_modes' is a bitfield that defines special functionality which has an 231'extended_modes' is a bitfield that defines special functionality which has an
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt
new file mode 100644
index 000000000000..125f4ab48998
--- /dev/null
+++ b/Documentation/pps/pps.txt
@@ -0,0 +1,172 @@
1
2 PPS - Pulse Per Second
3 ----------------------
4
5(C) Copyright 2007 Rodolfo Giometti <giometti@enneenne.com>
6
7This program is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17
18
19Overview
20--------
21
22LinuxPPS provides a programming interface (API) to define in the
23system several PPS sources.
24
25PPS means "pulse per second" and a PPS source is just a device which
26provides a high precision signal each second so that an application
27can use it to adjust system clock time.
28
29A PPS source can be connected to a serial port (usually to the Data
30Carrier Detect pin) or to a parallel port (ACK-pin) or to a special
31CPU's GPIOs (this is the common case in embedded systems) but in each
32case when a new pulse arrives the system must apply to it a timestamp
33and record it for userland.
34
35Common use is the combination of the NTPD as userland program, with a
36GPS receiver as PPS source, to obtain a wallclock-time with
37sub-millisecond synchronisation to UTC.
38
39
40RFC considerations
41------------------
42
43While implementing a PPS API as RFC 2783 defines and using an embedded
44CPU GPIO-Pin as physical link to the signal, I encountered a deeper
45problem:
46
47 At startup it needs a file descriptor as argument for the function
48 time_pps_create().
49
50This implies that the source has a /dev/... entry. This assumption is
51ok for the serial and parallel port, where you can do something
52useful besides(!) the gathering of timestamps as it is the central
53task for a PPS-API. But this assumption does not work for a single
54purpose GPIO line. In this case even basic file-related functionality
55(like read() and write()) makes no sense at all and should not be a
56precondition for the use of a PPS-API.
57
58The problem can be simply solved if you consider that a PPS source is
59not always connected with a GPS data source.
60
61So your programs should check if the GPS data source (the serial port
62for instance) is a PPS source too, and if not they should provide the
63possibility to open another device as PPS source.
64
65In LinuxPPS the PPS sources are simply char devices usually mapped
66into files /dev/pps0, /dev/pps1, etc..
67
68
69Coding example
70--------------
71
72To register a PPS source into the kernel you should define a struct
73pps_source_info_s as follows:
74
75 static struct pps_source_info pps_ktimer_info = {
76 .name = "ktimer",
77 .path = "",
78 .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | \
79 PPS_ECHOASSERT | \
80 PPS_CANWAIT | PPS_TSFMT_TSPEC,
81 .echo = pps_ktimer_echo,
82 .owner = THIS_MODULE,
83 };
84
85and then calling the function pps_register_source() in your
86intialization routine as follows:
87
88 source = pps_register_source(&pps_ktimer_info,
89 PPS_CAPTUREASSERT | PPS_OFFSETASSERT);
90
91The pps_register_source() prototype is:
92
93 int pps_register_source(struct pps_source_info_s *info, int default_params)
94
95where "info" is a pointer to a structure that describes a particular
96PPS source, "default_params" tells the system what the initial default
97parameters for the device should be (it is obvious that these parameters
98must be a subset of ones defined in the struct
99pps_source_info_s which describe the capabilities of the driver).
100
101Once you have registered a new PPS source into the system you can
102signal an assert event (for example in the interrupt handler routine)
103just using:
104
105 pps_event(source, &ts, PPS_CAPTUREASSERT, ptr)
106
107where "ts" is the event's timestamp.
108
109The same function may also run the defined echo function
110(pps_ktimer_echo(), passing to it the "ptr" pointer) if the user
111asked for that... etc..
112
113Please see the file drivers/pps/clients/ktimer.c for example code.
114
115
116SYSFS support
117-------------
118
119If the SYSFS filesystem is enabled in the kernel it provides a new class:
120
121 $ ls /sys/class/pps/
122 pps0/ pps1/ pps2/
123
124Every directory is the ID of a PPS sources defined in the system and
125inside you find several files:
126
127 $ ls /sys/class/pps/pps0/
128 assert clear echo mode name path subsystem@ uevent
129
130Inside each "assert" and "clear" file you can find the timestamp and a
131sequence number:
132
133 $ cat /sys/class/pps/pps0/assert
134 1170026870.983207967#8
135
136Where before the "#" is the timestamp in seconds; after it is the
137sequence number. Other files are:
138
139* echo: reports if the PPS source has an echo function or not;
140
141* mode: reports available PPS functioning modes;
142
143* name: reports the PPS source's name;
144
145* path: reports the PPS source's device path, that is the device the
146 PPS source is connected to (if it exists).
147
148
149Testing the PPS support
150-----------------------
151
152In order to test the PPS support even without specific hardware you can use
153the ktimer driver (see the client subsection in the PPS configuration menu)
154and the userland tools provided into Documentaion/pps/ directory.
155
156Once you have enabled the compilation of ktimer just modprobe it (if
157not statically compiled):
158
159 # modprobe ktimer
160
161and the run ppstest as follow:
162
163 $ ./ppstest /dev/pps0
164 trying PPS source "/dev/pps1"
165 found PPS source "/dev/pps1"
166 ok, found 1 source(s), now start fetching data...
167 source 0 - assert 1186592699.388832443, sequence: 364 - clear 0.000000000, sequence: 0
168 source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0
169 source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0
170
171Please, note that to compile userland programs you need the file timepps.h
172(see Documentation/pps/).
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 7224459b469e..aae8355d3166 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -131,8 +131,8 @@ Example:
131 } 131 }
132 132
133 /* Add new node and rebalance tree. */ 133 /* Add new node and rebalance tree. */
134 rb_link_node(data->node, parent, new); 134 rb_link_node(&data->node, parent, new);
135 rb_insert_color(data->node, root); 135 rb_insert_color(&data->node, root);
136 136
137 return TRUE; 137 return TRUE;
138 } 138 }
@@ -146,10 +146,10 @@ To remove an existing node from a tree, call:
146 146
147Example: 147Example:
148 148
149 struct mytype *data = mysearch(mytree, "walrus"); 149 struct mytype *data = mysearch(&mytree, "walrus");
150 150
151 if (data) { 151 if (data) {
152 rb_erase(data->node, mytree); 152 rb_erase(&data->node, &mytree);
153 myfree(data); 153 myfree(data);
154 } 154 }
155 155
@@ -188,5 +188,5 @@ Example:
188 188
189 struct rb_node *node; 189 struct rb_node *node;
190 for (node = rb_first(&mytree); node; node = rb_next(node)) 190 for (node = rb_first(&mytree); node; node = rb_next(node))
191 printk("key=%s\n", rb_entry(node, int, keystring)); 191 printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
192 192
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 4d3ee317a4a3..b4860509c319 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -1,575 +1,139 @@
1rfkill - RF switch subsystem support 1rfkill - RF kill switch support
2==================================== 2===============================
3 3
41 Introduction 41. Introduction
52 Implementation details 52. Implementation details
63 Kernel driver guidelines 63. Kernel API
73.1 wireless device drivers 74. Userspace support
83.2 platform/switch drivers
93.3 input device drivers
104 Kernel API
115 Userspace support
12 8
13 9
141. Introduction: 101. Introduction
15 11
16The rfkill switch subsystem exists to add a generic interface to circuitry that 12The rfkill subsystem provides a generic interface to disabling any radio
17can enable or disable the signal output of a wireless *transmitter* of any 13transmitter in the system. When a transmitter is blocked, it shall not
18type. By far, the most common use is to disable radio-frequency transmitters. 14radiate any power.
19 15
20Note that disabling the signal output means that the the transmitter is to be 16The subsystem also provides the ability to react on button presses and
21made to not emit any energy when "blocked". rfkill is not about blocking data 17disable all transmitters of a certain type (or all). This is intended for
22transmissions, it is about blocking energy emission. 18situations where transmitters need to be turned off, for example on
19aircraft.
23 20
24The rfkill subsystem offers support for keys and switches often found on 21The rfkill subsystem has a concept of "hard" and "soft" block, which
25laptops to enable wireless devices like WiFi and Bluetooth, so that these keys 22differ little in their meaning (block == transmitters off) but rather in
26and switches actually perform an action in all wireless devices of a given type 23whether they can be changed or not:
27attached to the system. 24 - hard block: read-only radio block that cannot be overriden by software
25 - soft block: writable radio block (need not be readable) that is set by
26 the system software.
28 27
29The buttons to enable and disable the wireless transmitters are important in
30situations where the user is for example using his laptop on a location where
31radio-frequency transmitters _must_ be disabled (e.g. airplanes).
32 28
33Because of this requirement, userspace support for the keys should not be made 292. Implementation details
34mandatory. Because userspace might want to perform some additional smarter
35tasks when the key is pressed, rfkill provides userspace the possibility to
36take over the task to handle the key events.
37 30
38=============================================================================== 31The rfkill subsystem is composed of three main components:
392: Implementation details 32 * the rfkill core,
33 * the deprecated rfkill-input module (an input layer handler, being
34 replaced by userspace policy code) and
35 * the rfkill drivers.
40 36
41The rfkill subsystem is composed of various components: the rfkill class, the 37The rfkill core provides API for kernel drivers to register their radio
42rfkill-input module (an input layer handler), and some specific input layer 38transmitter with the kernel, methods for turning it on and off and, letting
43events. 39the system know about hardware-disabled states that may be implemented on
40the device.
44 41
45The rfkill class provides kernel drivers with an interface that allows them to 42The rfkill core code also notifies userspace of state changes, and provides
46know when they should enable or disable a wireless network device transmitter. 43ways for userspace to query the current states. See the "Userspace support"
47This is enabled by the CONFIG_RFKILL Kconfig option. 44section below.
48 45
49The rfkill class support makes sure userspace will be notified of all state 46When the device is hard-blocked (either by a call to rfkill_set_hw_state()
50changes on rfkill devices through uevents. It provides a notification chain 47or from query_hw_block) set_block() will be invoked for additional software
51for interested parties in the kernel to also get notified of rfkill state 48block, but drivers can ignore the method call since they can use the return
52changes in other drivers. It creates several sysfs entries which can be used 49value of the function rfkill_set_hw_state() to sync the software state
53by userspace. See section "Userspace support". 50instead of keeping track of calls to set_block(). In fact, drivers should
54 51use the return value of rfkill_set_hw_state() unless the hardware actually
55The rfkill-input module provides the kernel with the ability to implement a 52keeps track of soft and hard block separately.
56basic response when the user presses a key or button (or toggles a switch)
57related to rfkill functionality. It is an in-kernel implementation of default
58policy of reacting to rfkill-related input events and neither mandatory nor
59required for wireless drivers to operate. It is enabled by the
60CONFIG_RFKILL_INPUT Kconfig option.
61
62rfkill-input is a rfkill-related events input layer handler. This handler will
63listen to all rfkill key events and will change the rfkill state of the
64wireless devices accordingly. With this option enabled userspace could either
65do nothing or simply perform monitoring tasks.
66
67The rfkill-input module also provides EPO (emergency power-off) functionality
68for all wireless transmitters. This function cannot be overridden, and it is
69always active. rfkill EPO is related to *_RFKILL_ALL input layer events.
70
71
72Important terms for the rfkill subsystem:
73
74In order to avoid confusion, we avoid the term "switch" in rfkill when it is
75referring to an electronic control circuit that enables or disables a
76transmitter. We reserve it for the physical device a human manipulates
77(which is an input device, by the way):
78
79rfkill switch:
80
81 A physical device a human manipulates. Its state can be perceived by
82 the kernel either directly (through a GPIO pin, ACPI GPE) or by its
83 effect on a rfkill line of a wireless device.
84
85rfkill controller:
86
87 A hardware circuit that controls the state of a rfkill line, which a
88 kernel driver can interact with *to modify* that state (i.e. it has
89 either write-only or read/write access).
90
91rfkill line:
92
93 An input channel (hardware or software) of a wireless device, which
94 causes a wireless transmitter to stop emitting energy (BLOCK) when it
95 is active. Point of view is extremely important here: rfkill lines are
96 always seen from the PoV of a wireless device (and its driver).
97
98soft rfkill line/software rfkill line:
99
100 A rfkill line the wireless device driver can directly change the state
101 of. Related to rfkill_state RFKILL_STATE_SOFT_BLOCKED.
102
103hard rfkill line/hardware rfkill line:
104
105 A rfkill line that works fully in hardware or firmware, and that cannot
106 be overridden by the kernel driver. The hardware device or the
107 firmware just exports its status to the driver, but it is read-only.
108 Related to rfkill_state RFKILL_STATE_HARD_BLOCKED.
109
110The enum rfkill_state describes the rfkill state of a transmitter:
111
112When a rfkill line or rfkill controller is in the RFKILL_STATE_UNBLOCKED state,
113the wireless transmitter (radio TX circuit for example) is *enabled*. When the
114it is in the RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the
115wireless transmitter is to be *blocked* from operating.
116
117RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change
118that state. RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio()
119will not be able to change the state and will return with a suitable error if
120attempts are made to set the state to RFKILL_STATE_UNBLOCKED.
121
122RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is
123locked in the BLOCKED state by a hardwire rfkill line (typically an input pin
124that, when active, forces the transmitter to be disabled) which the driver
125CANNOT override.
126
127Full rfkill functionality requires two different subsystems to cooperate: the
128input layer and the rfkill class. The input layer issues *commands* to the
129entire system requesting that devices registered to the rfkill class change
130state. The way this interaction happens is not complex, but it is not obvious
131either:
132
133Kernel Input layer:
134
135 * Generates KEY_WWAN, KEY_WLAN, KEY_BLUETOOTH, SW_RFKILL_ALL, and
136 other such events when the user presses certain keys, buttons, or
137 toggles certain physical switches.
138
139 THE INPUT LAYER IS NEVER USED TO PROPAGATE STATUS, NOTIFICATIONS OR THE
140 KIND OF STUFF AN ON-SCREEN-DISPLAY APPLICATION WOULD REPORT. It is
141 used to issue *commands* for the system to change behaviour, and these
142 commands may or may not be carried out by some kernel driver or
143 userspace application. It follows that doing user feedback based only
144 on input events is broken, as there is no guarantee that an input event
145 will be acted upon.
146
147 Most wireless communication device drivers implementing rfkill
148 functionality MUST NOT generate these events, and have no reason to
149 register themselves with the input layer. Doing otherwise is a common
150 misconception. There is an API to propagate rfkill status change
151 information, and it is NOT the input layer.
152
153rfkill class:
154
155 * Calls a hook in a driver to effectively change the wireless
156 transmitter state;
157 * Keeps track of the wireless transmitter state (with help from
158 the driver);
159 * Generates userspace notifications (uevents) and a call to a
160 notification chain (kernel) when there is a wireless transmitter
161 state change;
162 * Connects a wireless communications driver with the common rfkill
163 control system, which, for example, allows actions such as
164 "switch all bluetooth devices offline" to be carried out by
165 userspace or by rfkill-input.
166
167 THE RFKILL CLASS NEVER ISSUES INPUT EVENTS. THE RFKILL CLASS DOES
168 NOT LISTEN TO INPUT EVENTS. NO DRIVER USING THE RFKILL CLASS SHALL
169 EVER LISTEN TO, OR ACT ON RFKILL INPUT EVENTS. Doing otherwise is
170 a layering violation.
171
172 Most wireless data communication drivers in the kernel have just to
173 implement the rfkill class API to work properly. Interfacing to the
174 input layer is not often required (and is very often a *bug*) on
175 wireless drivers.
176
177 Platform drivers often have to attach to the input layer to *issue*
178 (but never to listen to) rfkill events for rfkill switches, and also to
179 the rfkill class to export a control interface for the platform rfkill
180 controllers to the rfkill subsystem. This does NOT mean the rfkill
181 switch is attached to a rfkill class (doing so is almost always wrong).
182 It just means the same kernel module is the driver for different
183 devices (rfkill switches and rfkill controllers).
184
185
186Userspace input handlers (uevents) or kernel input handlers (rfkill-input):
187
188 * Implements the policy of what should happen when one of the input
189 layer events related to rfkill operation is received.
190 * Uses the sysfs interface (userspace) or private rfkill API calls
191 to tell the devices registered with the rfkill class to change
192 their state (i.e. translates the input layer event into real
193 action).
194
195 * rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0
196 (power off all transmitters) in a special way: it ignores any
197 overrides and local state cache and forces all transmitters to the
198 RFKILL_STATE_SOFT_BLOCKED state (including those which are already
199 supposed to be BLOCKED).
200 * rfkill EPO will remain active until rfkill-input receives an
201 EV_SW SW_RFKILL_ALL 1 event. While the EPO is active, transmitters
202 are locked in the blocked state (rfkill will refuse to unblock them).
203 * rfkill-input implements different policies that the user can
204 select for handling EV_SW SW_RFKILL_ALL 1. It will unlock rfkill,
205 and either do nothing (leave transmitters blocked, but now unlocked),
206 restore the transmitters to their state before the EPO, or unblock
207 them all.
208
209Userspace uevent handler or kernel platform-specific drivers hooked to the
210rfkill notifier chain:
211
212 * Taps into the rfkill notifier chain or to KOBJ_CHANGE uevents,
213 in order to know when a device that is registered with the rfkill
214 class changes state;
215 * Issues feedback notifications to the user;
216 * In the rare platforms where this is required, synthesizes an input
217 event to command all *OTHER* rfkill devices to also change their
218 statues when a specific rfkill device changes state.
219
220
221===============================================================================
2223: Kernel driver guidelines
223
224Remember: point-of-view is everything for a driver that connects to the rfkill
225subsystem. All the details below must be measured/perceived from the point of
226view of the specific driver being modified.
227
228The first thing one needs to know is whether his driver should be talking to
229the rfkill class or to the input layer. In rare cases (platform drivers), it
230could happen that you need to do both, as platform drivers often handle a
231variety of devices in the same driver.
232
233Do not mistake input devices for rfkill controllers. The only type of "rfkill
234switch" device that is to be registered with the rfkill class are those
235directly controlling the circuits that cause a wireless transmitter to stop
236working (or the software equivalent of them), i.e. what we call a rfkill
237controller. Every other kind of "rfkill switch" is just an input device and
238MUST NOT be registered with the rfkill class.
239
240A driver should register a device with the rfkill class when ALL of the
241following conditions are met (they define a rfkill controller):
242
2431. The device is/controls a data communications wireless transmitter;
244
2452. The kernel can interact with the hardware/firmware to CHANGE the wireless
246 transmitter state (block/unblock TX operation);
247
2483. The transmitter can be made to not emit any energy when "blocked":
249 rfkill is not about blocking data transmissions, it is about blocking
250 energy emission;
251
252A driver should register a device with the input subsystem to issue
253rfkill-related events (KEY_WLAN, KEY_BLUETOOTH, KEY_WWAN, KEY_WIMAX,
254SW_RFKILL_ALL, etc) when ALL of the folowing conditions are met:
255
2561. It is directly related to some physical device the user interacts with, to
257 command the O.S./firmware/hardware to enable/disable a data communications
258 wireless transmitter.
259
260 Examples of the physical device are: buttons, keys and switches the user
261 will press/touch/slide/switch to enable or disable the wireless
262 communication device.
263
2642. It is NOT slaved to another device, i.e. there is no other device that
265 issues rfkill-related input events in preference to this one.
266
267 Please refer to the corner cases and examples section for more details.
268
269When in doubt, do not issue input events. For drivers that should generate
270input events in some platforms, but not in others (e.g. b43), the best solution
271is to NEVER generate input events in the first place. That work should be
272deferred to a platform-specific kernel module (which will know when to generate
273events through the rfkill notifier chain) or to userspace. This avoids the
274usual maintenance problems with DMI whitelisting.
275 53
276 54
277Corner cases and examples: 553. Kernel API
278====================================
279
2801. If the device is an input device that, because of hardware or firmware,
281causes wireless transmitters to be blocked regardless of the kernel's will, it
282is still just an input device, and NOT to be registered with the rfkill class.
283 56
2842. If the wireless transmitter switch control is read-only, it is an input
285device and not to be registered with the rfkill class (and maybe not to be made
286an input layer event source either, see below).
287 57
2883. If there is some other device driver *closer* to the actual hardware the 58Drivers for radio transmitters normally implement an rfkill driver.
289user interacted with (the button/switch/key) to issue an input event, THAT is
290the device driver that should be issuing input events.
291
292E.g:
293 [RFKILL slider switch] -- [GPIO hardware] -- [WLAN card rf-kill input]
294 (platform driver) (wireless card driver)
295
296The user is closer to the RFKILL slide switch plaform driver, so the driver
297which must issue input events is the platform driver looking at the GPIO
298hardware, and NEVER the wireless card driver (which is just a slave). It is
299very likely that there are other leaves than just the WLAN card rf-kill input
300(e.g. a bluetooth card, etc)...
301 59
302On the other hand, some embedded devices do this: 60Platform drivers might implement input devices if the rfkill button is just
61that, a button. If that button influences the hardware then you need to
62implement an rfkill driver instead. This also applies if the platform provides
63a way to turn on/off the transmitter(s).
303 64
304 [RFKILL slider switch] -- [WLAN card rf-kill input] 65For some platforms, it is possible that the hardware state changes during
305 (wireless card driver) 66suspend/hibernation, in which case it will be necessary to update the rfkill
67core with the current state is at resume time.
306 68
307In this situation, the wireless card driver *could* register itself as an input 69To create an rfkill driver, driver's Kconfig needs to have
308device and issue rf-kill related input events... but in order to AVOID the need
309for DMI whitelisting, the wireless card driver does NOT do it. Userspace (HAL)
310or a platform driver (that exists only on these embedded devices) will do the
311dirty job of issuing the input events.
312 70
71 depends on RFKILL || !RFKILL
313 72
314COMMON MISTAKES in kernel drivers, related to rfkill: 73to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL
315==================================== 74case allows the driver to be built when rfkill is not configured, which which
75case all rfkill API can still be used but will be provided by static inlines
76which compile to almost nothing.
316 77
3171. NEVER confuse input device keys and buttons with input device switches. 78Calling rfkill_set_hw_state() when a state change happens is required from
79rfkill drivers that control devices that can be hard-blocked unless they also
80assign the poll_hw_block() callback (then the rfkill core will poll the
81device). Don't do this unless you cannot get the event in any other way.
318 82
319 1a. Switches are always set or reset. They report the current state
320 (on position or off position).
321 83
322 1b. Keys and buttons are either in the pressed or not-pressed state, and
323 that's it. A "button" that latches down when you press it, and
324 unlatches when you press it again is in fact a switch as far as input
325 devices go.
326 84
327Add the SW_* events you need for switches, do NOT try to emulate a button using 855. Userspace support
328KEY_* events just because there is no such SW_* event yet. Do NOT try to use,
329for example, KEY_BLUETOOTH when you should be using SW_BLUETOOTH instead.
330 86
3312. Input device switches (sources of EV_SW events) DO store their current state 87The recommended userspace interface to use is /dev/rfkill, which is a misc
332(so you *must* initialize it by issuing a gratuitous input layer event on 88character device that allows userspace to obtain and set the state of rfkill
333driver start-up and also when resuming from sleep), and that state CAN be 89devices and sets of devices. It also notifies userspace about device addition
334queried from userspace through IOCTLs. There is no sysfs interface for this, 90and removal. The API is a simple read/write API that is defined in
335but that doesn't mean you should break things trying to hook it to the rfkill 91linux/rfkill.h, with one ioctl that allows turning off the deprecated input
336class to get a sysfs interface :-) 92handler in the kernel for the transition period.
337 93
3383. Do not issue *_RFKILL_ALL events by default, unless you are sure it is the 94Except for the one ioctl, communication with the kernel is done via read()
339correct event for your switch/button. These events are emergency power-off 95and write() of instances of 'struct rfkill_event'. In this structure, the
340events when they are trying to turn the transmitters off. An example of an 96soft and hard block are properly separated (unlike sysfs, see below) and
341input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill 97userspace is able to get a consistent snapshot of all rfkill devices in the
342switch in a laptop which is NOT a hotkey, but a real sliding/rocker switch. 98system. Also, it is possible to switch all rfkill drivers (or all drivers of
343An example of an input device which SHOULD NOT generate *_RFKILL_ALL events by 99a specified type) into a state which also updates the default state for
344default, is any sort of hot key that is type-specific (e.g. the one for WLAN). 100hotplugged devices.
345 101
102After an application opens /dev/rfkill, it can read the current state of
103all devices, and afterwards can poll the descriptor for hotplug or state
104change events.
346 105
3473.1 Guidelines for wireless device drivers 106Applications must ignore operations (the "op" field) they do not handle,
348------------------------------------------ 107this allows the API to be extended in the future.
349 108
350(in this text, rfkill->foo means the foo field of struct rfkill). 109Additionally, each rfkill device is registered in sysfs and there has the
351 110following attributes:
3521. Each independent transmitter in a wireless device (usually there is only one
353transmitter per device) should have a SINGLE rfkill class attached to it.
354
3552. If the device does not have any sort of hardware assistance to allow the
356driver to rfkill the device, the driver should emulate it by taking all actions
357required to silence the transmitter.
358
3593. If it is impossible to silence the transmitter (i.e. it still emits energy,
360even if it is just in brief pulses, when there is no data to transmit and there
361is no hardware support to turn it off) do NOT lie to the users. Do not attach
362it to a rfkill class. The rfkill subsystem does not deal with data
363transmission, it deals with energy emission. If the transmitter is emitting
364energy, it is not blocked in rfkill terms.
365
3664. It doesn't matter if the device has multiple rfkill input lines affecting
367the same transmitter, their combined state is to be exported as a single state
368per transmitter (see rule 1).
369
370This rule exists because users of the rfkill subsystem expect to get (and set,
371when possible) the overall transmitter rfkill state, not of a particular rfkill
372line.
373
3745. The wireless device driver MUST NOT leave the transmitter enabled during
375suspend and hibernation unless:
376
377 5.1. The transmitter has to be enabled for some sort of functionality
378 like wake-on-wireless-packet or autonomous packed forwarding in a mesh
379 network, and that functionality is enabled for this suspend/hibernation
380 cycle.
381
382AND
383
384 5.2. The device was not on a user-requested BLOCKED state before
385 the suspend (i.e. the driver must NOT unblock a device, not even
386 to support wake-on-wireless-packet or remain in the mesh).
387
388In other words, there is absolutely no allowed scenario where a driver can
389automatically take action to unblock a rfkill controller (obviously, this deals
390with scenarios where soft-blocking or both soft and hard blocking is happening.
391Scenarios where hardware rfkill lines are the only ones blocking the
392transmitter are outside of this rule, since the wireless device driver does not
393control its input hardware rfkill lines in the first place).
394
3956. During resume, rfkill will try to restore its previous state.
396
3977. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio
398until it is resumed.
399
400
401Example of a WLAN wireless driver connected to the rfkill subsystem:
402--------------------------------------------------------------------
403
404A certain WLAN card has one input pin that causes it to block the transmitter
405and makes the status of that input pin available (only for reading!) to the
406kernel driver. This is a hard rfkill input line (it cannot be overridden by
407the kernel driver).
408
409The card also has one PCI register that, if manipulated by the driver, causes
410it to block the transmitter. This is a soft rfkill input line.
411
412It has also a thermal protection circuitry that shuts down its transmitter if
413the card overheats, and makes the status of that protection available (only for
414reading!) to the kernel driver. This is also a hard rfkill input line.
415
416If either one of these rfkill lines are active, the transmitter is blocked by
417the hardware and forced offline.
418
419The driver should allocate and attach to its struct device *ONE* instance of
420the rfkill class (there is only one transmitter).
421
422It can implement the get_state() hook, and return RFKILL_STATE_HARD_BLOCKED if
423either one of its two hard rfkill input lines are active. If the two hard
424rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft
425rfkill input line is active. Only if none of the rfkill input lines are
426active, will it return RFKILL_STATE_UNBLOCKED.
427
428Since the device has a hardware rfkill line, it IS subject to state changes
429external to rfkill. Therefore, the driver must make sure that it calls
430rfkill_force_state() to keep the status always up-to-date, and it must do a
431rfkill_force_state() on resume from sleep.
432
433Every time the driver gets a notification from the card that one of its rfkill
434lines changed state (polling might be needed on badly designed cards that don't
435generate interrupts for such events), it recomputes the rfkill state as per
436above, and calls rfkill_force_state() to update it.
437
438The driver should implement the toggle_radio() hook, that:
439
4401. Returns an error if one of the hardware rfkill lines are active, and the
441caller asked for RFKILL_STATE_UNBLOCKED.
442
4432. Activates the soft rfkill line if the caller asked for state
444RFKILL_STATE_SOFT_BLOCKED. It should do this even if one of the hard rfkill
445lines are active, effectively double-blocking the transmitter.
446
4473. Deactivates the soft rfkill line if none of the hardware rfkill lines are
448active and the caller asked for RFKILL_STATE_UNBLOCKED.
449
450===============================================================================
4514: Kernel API
452
453To build a driver with rfkill subsystem support, the driver should depend on
454(or select) the Kconfig symbol RFKILL; it should _not_ depend on RKFILL_INPUT.
455
456The hardware the driver talks to may be write-only (where the current state
457of the hardware is unknown), or read-write (where the hardware can be queried
458about its current state).
459
460The rfkill class will call the get_state hook of a device every time it needs
461to know the *real* current state of the hardware. This can happen often, but
462it does not do any polling, so it is not enough on hardware that is subject
463to state changes outside of the rfkill subsystem.
464
465Therefore, calling rfkill_force_state() when a state change happens is
466mandatory when the device has a hardware rfkill line, or when something else
467like the firmware could cause its state to be changed without going through the
468rfkill class.
469
470Some hardware provides events when its status changes. In these cases, it is
471best for the driver to not provide a get_state hook, and instead register the
472rfkill class *already* with the correct status, and keep it updated using
473rfkill_force_state() when it gets an event from the hardware.
474
475rfkill_force_state() must be used on the device resume handlers to update the
476rfkill status, should there be any chance of the device status changing during
477the sleep.
478
479There is no provision for a statically-allocated rfkill struct. You must
480use rfkill_allocate() to allocate one.
481
482You should:
483 - rfkill_allocate()
484 - modify rfkill fields (flags, name)
485 - modify state to the current hardware state (THIS IS THE ONLY TIME
486 YOU CAN ACCESS state DIRECTLY)
487 - rfkill_register()
488
489The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through
490a suitable return of get_state() or through rfkill_force_state().
491
492When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch
493it to a different state is through a suitable return of get_state() or through
494rfkill_force_state().
495
496If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED
497when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should
498not return an error. Instead, it should try to double-block the transmitter,
499so that its state will change from RFKILL_STATE_HARD_BLOCKED to
500RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease.
501
502Please refer to the source for more documentation.
503
504===============================================================================
5055: Userspace support
506
507rfkill devices issue uevents (with an action of "change"), with the following
508environment variables set:
509
510RFKILL_NAME
511RFKILL_STATE
512RFKILL_TYPE
513
514The ABI for these variables is defined by the sysfs attributes. It is best
515to take a quick look at the source to make sure of the possible values.
516
517It is expected that HAL will trap those, and bridge them to DBUS, etc. These
518events CAN and SHOULD be used to give feedback to the user about the rfkill
519status of the system.
520
521Input devices may issue events that are related to rfkill. These are the
522various KEY_* events and SW_* events supported by rfkill-input.c.
523
524******IMPORTANT******
525When rfkill-input is ACTIVE, userspace is NOT TO CHANGE THE STATE OF AN RFKILL
526SWITCH IN RESPONSE TO AN INPUT EVENT also handled by rfkill-input, unless it
527has set to true the user_claim attribute for that particular switch. This rule
528is *absolute*; do NOT violate it.
529******IMPORTANT******
530
531Userspace must not assume it is the only source of control for rfkill switches.
532Their state CAN and WILL change due to firmware actions, direct user actions,
533and the rfkill-input EPO override for *_RFKILL_ALL.
534
535When rfkill-input is not active, userspace must initiate a rfkill status
536change by writing to the "state" attribute in order for anything to happen.
537
538Take particular care to implement EV_SW SW_RFKILL_ALL properly. When that
539switch is set to OFF, *every* rfkill device *MUST* be immediately put into the
540RFKILL_STATE_SOFT_BLOCKED state, no questions asked.
541
542The following sysfs entries will be created:
543 111
544 name: Name assigned by driver to this key (interface or driver name). 112 name: Name assigned by driver to this key (interface or driver name).
545 type: Name of the key type ("wlan", "bluetooth", etc). 113 type: Driver type string ("wlan", "bluetooth", etc).
114 persistent: Whether the soft blocked state is initialised from
115 non-volatile storage at startup.
546 state: Current state of the transmitter 116 state: Current state of the transmitter
547 0: RFKILL_STATE_SOFT_BLOCKED 117 0: RFKILL_STATE_SOFT_BLOCKED
548 transmitter is forced off, but one can override it 118 transmitter is turned off by software
549 by a write to the state attribute;
550 1: RFKILL_STATE_UNBLOCKED 119 1: RFKILL_STATE_UNBLOCKED
551 transmiter is NOT forced off, and may operate if 120 transmitter is (potentially) active
552 all other conditions for such operation are met
553 (such as interface is up and configured, etc);
554 2: RFKILL_STATE_HARD_BLOCKED 121 2: RFKILL_STATE_HARD_BLOCKED
555 transmitter is forced off by something outside of 122 transmitter is forced off by something outside of
556 the driver's control. One cannot set a device to 123 the driver's control.
557 this state through writes to the state attribute; 124 This file is deprecated because it can only properly show
558 claim: 1: Userspace handles events, 0: Kernel handles events 125 three of the four possible states, soft-and-hard-blocked is
559 126 missing.
560Both the "state" and "claim" entries are also writable. For the "state" entry 127 claim: 0: Kernel handles events
561this means that when 1 or 0 is written, the device rfkill state (if not yet in 128 This file is deprecated because there no longer is a way to
562the requested state), will be will be toggled accordingly. 129 claim just control over a single rfkill instance.
563 130
564For the "claim" entry writing 1 to it means that the kernel no longer handles 131rfkill devices also issue uevents (with an action of "change"), with the
565key events even though RFKILL_INPUT input was enabled. When "claim" has been 132following environment variables set:
566set to 0, userspace should make sure that it listens for the input events or 133
567check the sysfs "state" entry regularly to correctly perform the required tasks 134RFKILL_NAME
568when the rkfill key is pressed. 135RFKILL_STATE
569 136RFKILL_TYPE
570A note about input devices and EV_SW events: 137
571 138The contents of these variables corresponds to the "name", "state" and
572In order to know the current state of an input device switch (like 139"type" sysfs files explained above.
573SW_RFKILL_ALL), you will need to use an IOCTL. That information is not
574available through sysfs in a generic way at this time, and it is not available
575through the rfkill class AT ALL.
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
index 535f69fab45f..fd1cd8aae4eb 100644
--- a/Documentation/robust-futex-ABI.txt
+++ b/Documentation/robust-futex-ABI.txt
@@ -135,7 +135,7 @@ manipulating this list), the user code must observe the following
135protocol on 'lock entry' insertion and removal: 135protocol on 'lock entry' insertion and removal:
136 136
137On insertion: 137On insertion:
138 1) set the 'list_op_pending' word to the address of the 'lock word' 138 1) set the 'list_op_pending' word to the address of the 'lock entry'
139 to be inserted, 139 to be inserted,
140 2) acquire the futex lock, 140 2) acquire the futex lock,
141 3) add the lock entry, with its thread id (TID) in the bottom 29 bits 141 3) add the lock entry, with its thread id (TID) in the bottom 29 bits
@@ -143,7 +143,7 @@ On insertion:
143 4) clear the 'list_op_pending' word. 143 4) clear the 'list_op_pending' word.
144 144
145On removal: 145On removal:
146 1) set the 'list_op_pending' word to the address of the 'lock word' 146 1) set the 'list_op_pending' word to the address of the 'lock entry'
147 to be removed, 147 to be removed,
148 2) remove the lock entry for this lock from the 'head' list, 148 2) remove the lock entry for this lock from the 'head' list,
149 2) release the futex lock, and 149 2) release the futex lock, and
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 10711d9f0788..1eb576a023bd 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -1984,7 +1984,7 @@ break *$pc
1984 1984
1985break *0x400618 1985break *0x400618
1986 1986
1987heres a really useful one for large programs 1987Here's a really useful one for large programs
1988rbr 1988rbr
1989Set a breakpoint for all functions matching REGEXP 1989Set a breakpoint for all functions matching REGEXP
1990e.g. 1990e.g.
@@ -2211,7 +2211,7 @@ Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
2211#5 0x51692c in readline_internal () at readline.c:521 2211#5 0x51692c in readline_internal () at readline.c:521
2212#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ") 2212#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ")
2213 at readline.c:349 2213 at readline.c:349
2214#7 0x4d7a8a in command_line_input (prrompt=0x564420 "(gdb) ", repeat=1, 2214#7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
2215 annotation_suffix=0x4d6b44 "prompt") at top.c:2091 2215 annotation_suffix=0x4d6b44 "prompt") at top.c:2091
2216#8 0x4d6cf0 in command_loop () at top.c:1345 2216#8 0x4d6cf0 in command_loop () at top.c:1345
2217#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635 2217#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX
index aabcc3a089ba..3c00c9c3219e 100644
--- a/Documentation/scheduler/00-INDEX
+++ b/Documentation/scheduler/00-INDEX
@@ -2,8 +2,6 @@
2 - this file. 2 - this file.
3sched-arch.txt 3sched-arch.txt
4 - CPU Scheduler implementation hints for architecture specific code. 4 - CPU Scheduler implementation hints for architecture specific code.
5sched-coding.txt
6 - reference for various scheduler-related methods in the O(1) scheduler.
7sched-design-CFS.txt 5sched-design-CFS.txt
8 - goals, design and implementation of the Complete Fair Scheduler. 6 - goals, design and implementation of the Complete Fair Scheduler.
9sched-domains.txt 7sched-domains.txt
diff --git a/Documentation/scheduler/sched-coding.txt b/Documentation/scheduler/sched-coding.txt
deleted file mode 100644
index cbd8db752acf..000000000000
--- a/Documentation/scheduler/sched-coding.txt
+++ /dev/null
@@ -1,126 +0,0 @@
1 Reference for various scheduler-related methods in the O(1) scheduler
2 Robert Love <rml@tech9.net>, MontaVista Software
3
4
5Note most of these methods are local to kernel/sched.c - this is by design.
6The scheduler is meant to be self-contained and abstracted away. This document
7is primarily for understanding the scheduler, not interfacing to it. Some of
8the discussed interfaces, however, are general process/scheduling methods.
9They are typically defined in include/linux/sched.h.
10
11
12Main Scheduling Methods
13-----------------------
14
15void load_balance(runqueue_t *this_rq, int idle)
16 Attempts to pull tasks from one cpu to another to balance cpu usage,
17 if needed. This method is called explicitly if the runqueues are
18 imbalanced or periodically by the timer tick. Prior to calling,
19 the current runqueue must be locked and interrupts disabled.
20
21void schedule()
22 The main scheduling function. Upon return, the highest priority
23 process will be active.
24
25
26Locking
27-------
28
29Each runqueue has its own lock, rq->lock. When multiple runqueues need
30to be locked, lock acquires must be ordered by ascending &runqueue value.
31
32A specific runqueue is locked via
33
34 task_rq_lock(task_t pid, unsigned long *flags)
35
36which disables preemption, disables interrupts, and locks the runqueue pid is
37running on. Likewise,
38
39 task_rq_unlock(task_t pid, unsigned long *flags)
40
41unlocks the runqueue pid is running on, restores interrupts to their previous
42state, and reenables preemption.
43
44The routines
45
46 double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
47
48and
49
50 double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
51
52safely lock and unlock, respectively, the two specified runqueues. They do
53not, however, disable and restore interrupts. Users are required to do so
54manually before and after calls.
55
56
57Values
58------
59
60MAX_PRIO
61 The maximum priority of the system, stored in the task as task->prio.
62 Lower priorities are higher. Normal (non-RT) priorities range from
63 MAX_RT_PRIO to (MAX_PRIO - 1).
64MAX_RT_PRIO
65 The maximum real-time priority of the system. Valid RT priorities
66 range from 0 to (MAX_RT_PRIO - 1).
67MAX_USER_RT_PRIO
68 The maximum real-time priority that is exported to user-space. Should
69 always be equal to or less than MAX_RT_PRIO. Setting it less allows
70 kernel threads to have higher priorities than any user-space task.
71MIN_TIMESLICE
72MAX_TIMESLICE
73 Respectively, the minimum and maximum timeslices (quanta) of a process.
74
75Data
76----
77
78struct runqueue
79 The main per-CPU runqueue data structure.
80struct task_struct
81 The main per-process data structure.
82
83
84General Methods
85---------------
86
87cpu_rq(cpu)
88 Returns the runqueue of the specified cpu.
89this_rq()
90 Returns the runqueue of the current cpu.
91task_rq(pid)
92 Returns the runqueue which holds the specified pid.
93cpu_curr(cpu)
94 Returns the task currently running on the given cpu.
95rt_task(pid)
96 Returns true if pid is real-time, false if not.
97
98
99Process Control Methods
100-----------------------
101
102void set_user_nice(task_t *p, long nice)
103 Sets the "nice" value of task p to the given value.
104int setscheduler(pid_t pid, int policy, struct sched_param *param)
105 Sets the scheduling policy and parameters for the given pid.
106int set_cpus_allowed(task_t *p, unsigned long new_mask)
107 Sets a given task's CPU affinity and migrates it to a proper cpu.
108 Callers must have a valid reference to the task and assure the
109 task not exit prematurely. No locks can be held during the call.
110set_task_state(tsk, state_value)
111 Sets the given task's state to the given value.
112set_current_state(state_value)
113 Sets the current task's state to the given value.
114void set_tsk_need_resched(struct task_struct *tsk)
115 Sets need_resched in the given task.
116void clear_tsk_need_resched(struct task_struct *tsk)
117 Clears need_resched in the given task.
118void set_need_resched()
119 Sets need_resched in the current task.
120void clear_need_resched()
121 Clears need_resched in the current task.
122int need_resched()
123 Returns true if need_resched is set in the current task, false
124 otherwise.
125yield()
126 Place the current process at the end of the runqueue and call schedule.
diff --git a/Documentation/scheduler/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt
index e2bae5a577e3..3ac1e46d5365 100644
--- a/Documentation/scheduler/sched-nice-design.txt
+++ b/Documentation/scheduler/sched-nice-design.txt
@@ -55,7 +55,7 @@ To sum it up: we always wanted to make nice levels more consistent, but
55within the constraints of HZ and jiffies and their nasty design level 55within the constraints of HZ and jiffies and their nasty design level
56coupling to timeslices and granularity it was not really viable. 56coupling to timeslices and granularity it was not really viable.
57 57
58The second (less frequent but still periodically occuring) complaint 58The second (less frequent but still periodically occurring) complaint
59about Linux's nice level support was its assymetry around the origo 59about Linux's nice level support was its assymetry around the origo
60(which you can see demonstrated in the picture above), or more 60(which you can see demonstrated in the picture above), or more
61accurately: the fact that nice level behavior depended on the _absolute_ 61accurately: the fact that nice level behavior depended on the _absolute_
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 3ef339f491e0..1df7f9cdab05 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -4,6 +4,7 @@
4CONTENTS 4CONTENTS
5======== 5========
6 6
70. WARNING
71. Overview 81. Overview
8 1.1 The problem 9 1.1 The problem
9 1.2 The solution 10 1.2 The solution
@@ -14,6 +15,23 @@ CONTENTS
143. Future plans 153. Future plans
15 16
16 17
180. WARNING
19==========
20
21 Fiddling with these settings can result in an unstable system, the knobs are
22 root only and assumes root knows what he is doing.
23
24Most notable:
25
26 * very small values in sched_rt_period_us can result in an unstable
27 system when the period is smaller than either the available hrtimer
28 resolution, or the time it takes to handle the budget refresh itself.
29
30 * very small values in sched_rt_runtime_us can result in an unstable
31 system when the runtime is so small the system has difficulty making
32 forward progress (NOTE: the migration thread and kstopmachine both
33 are real-time processes).
34
171. Overview 351. Overview
18=========== 36===========
19 37
@@ -126,7 +144,7 @@ This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_u
126to control the CPU time reserved for each control group instead. 144to control the CPU time reserved for each control group instead.
127 145
128For more information on working with control groups, you should read 146For more information on working with control groups, you should read
129Documentation/cgroups.txt as well. 147Documentation/cgroups/cgroups.txt as well.
130 148
131Group settings are checked against the following limits in order to keep the configuration 149Group settings are checked against the following limits in order to keep the configuration
132schedulable: 150schedulable:
@@ -169,7 +187,7 @@ get their allocated time.
169 187
170Implementing SCHED_EDF might take a while to complete. Priority Inheritance is 188Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
171the biggest challenge as the current linux PI infrastructure is geared towards 189the biggest challenge as the current linux PI infrastructure is geared towards
172the limited static priority levels 0-139. With deadline scheduling you need to 190the limited static priority levels 0-99. With deadline scheduling you need to
173do deadline inheritance (since priority is inversely proportional to the 191do deadline inheritance (since priority is inversely proportional to the
174deadline delta (deadline - now). 192deadline delta (deadline - now).
175 193
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index ddace3afc83b..30f643f611b2 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -60,17 +60,9 @@ Supported Cards/Chipsets
60 9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite) 60 9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite)
61 9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite) 61 9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite)
62 9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite) 62 9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite)
63 9005:0285:9005:02d8 Adaptec 5405G (Voodoo40 PM) 63 9005:0285:9005:02d8 Adaptec 5405Z (Voodoo40 BLBU)
64 9005:0285:9005:02d9 Adaptec 5445G (Voodoo44 PM) 64 9005:0285:9005:02d9 Adaptec 5445Z (Voodoo44 BLBU)
65 9005:0285:9005:02da Adaptec 5805G (Voodoo80 PM) 65 9005:0285:9005:02da Adaptec 5805Z (Voodoo80 BLBU)
66 9005:0285:9005:02db Adaptec 5085G (Voodoo08 PM)
67 9005:0285:9005:02dc Adaptec 51245G (Voodoo124 PM)
68 9005:0285:9005:02dd Adaptec 51645G (Voodoo164 PM)
69 9005:0285:9005:02de Adaptec 52445G (Voodoo244 PM)
70 9005:0285:9005:02df Adaptec ASR-2045G (Voodoo04 Lite PM)
71 9005:0285:9005:02e0 Adaptec ASR-2405G (Voodoo40 Lite PM)
72 9005:0285:9005:02e1 Adaptec ASR-2445G (Voodoo44 Lite PM)
73 9005:0285:9005:02e2 Adaptec ASR-2805G (Voodoo80 Lite PM)
74 1011:0046:9005:0364 Adaptec 5400S (Mustang) 66 1011:0046:9005:0364 Adaptec 5400S (Mustang)
75 1011:0046:9005:0365 Adaptec 5400S (Mustang) 67 1011:0046:9005:0365 Adaptec 5400S (Mustang)
76 9005:0287:9005:0800 Adaptec Themisto (Jupiter) 68 9005:0287:9005:0800 Adaptec Themisto (Jupiter)
@@ -140,6 +132,7 @@ Deanna Bonds (non-DASD support, PAE fibs and 64 bit,
140 where fibs that go to the hardware are consistently called hw_fibs and 132 where fibs that go to the hardware are consistently called hw_fibs and
141 not just fibs like the name of the driver tracking structure) 133 not just fibs like the name of the driver tracking structure)
142Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations. 134Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations.
135Achim Leubner <Achim_Leubner@adaptec.com>
143 136
144Original Driver 137Original Driver
145------------------------- 138-------------------------
diff --git a/Documentation/scsi/aic79xx.txt b/Documentation/scsi/aic79xx.txt
index 683ccae00ad4..c014eccaf19f 100644
--- a/Documentation/scsi/aic79xx.txt
+++ b/Documentation/scsi/aic79xx.txt
@@ -194,7 +194,7 @@ The following information is available in this file:
194 - Packetized SCSI Protocol at 160MB/s and 320MB/s 194 - Packetized SCSI Protocol at 160MB/s and 320MB/s
195 - Quick Arbitration Selection (QAS) 195 - Quick Arbitration Selection (QAS)
196 - Retained Training Information (Rev B. ASIC only) 196 - Retained Training Information (Rev B. ASIC only)
197 - Interrupt Coalessing 197 - Interrupt Coalescing
198 - Initiator Mode (target mode not currently 198 - Initiator Mode (target mode not currently
199 supported) 199 supported)
200 - Support for the PCI-X standard up to 133MHz 200 - Support for the PCI-X standard up to 133MHz
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
index 230e30846ef2..08e2b4d04aab 100644
--- a/Documentation/scsi/ncr53c8xx.txt
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -206,7 +206,7 @@ of MOVE MEMORY instructions.
206The 896 and the 895A allows handling of the phase mismatch context from 206The 896 and the 895A allows handling of the phase mismatch context from
207SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor 207SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor
208until the C code has saved the context of the transfer). 208until the C code has saved the context of the transfer).
209Implementing this without using LOAD/STORE instructions would be painfull 209Implementing this without using LOAD/STORE instructions would be painful
210and I didn't even want to try it. 210and I didn't even want to try it.
211 211
212The 896 chip supports 64 bit PCI transactions and addressing, while the 212The 896 chip supports 64 bit PCI transactions and addressing, while the
@@ -240,7 +240,7 @@ characteristics. This feature may also reduce average command latency.
240In order to really gain advantage of this feature, devices must have 240In order to really gain advantage of this feature, devices must have
241a reasonable cache size (No miracle is to be expected for a low-end 241a reasonable cache size (No miracle is to be expected for a low-end
242hard disk with 128 KB or less). 242hard disk with 128 KB or less).
243Some kown SCSI devices do not properly support tagged command queuing. 243Some known SCSI devices do not properly support tagged command queuing.
244Generally, firmware revisions that fix this kind of problems are available 244Generally, firmware revisions that fix this kind of problems are available
245at respective vendor web/ftp sites. 245at respective vendor web/ftp sites.
246All I can say is that the hard disks I use on my machines behave well with 246All I can say is that the hard disks I use on my machines behave well with
diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt
new file mode 100644
index 000000000000..da162f7fd5f5
--- /dev/null
+++ b/Documentation/scsi/osd.txt
@@ -0,0 +1,198 @@
1The OSD Standard
2================
3OSD (Object-Based Storage Device) is a T10 SCSI command set that is designed
4to provide efficient operation of input/output logical units that manage the
5allocation, placement, and accessing of variable-size data-storage containers,
6called objects. Objects are intended to contain operating system and application
7constructs. Each object has associated attributes attached to it, which are
8integral part of the object and provide metadata about the object. The standard
9defines some common obligatory attributes, but user attributes can be added as
10needed.
11
12See: http://www.t10.org/ftp/t10/drafts/osd2/ for the latest draft for OSD 2
13or search the web for "OSD SCSI"
14
15OSD in the Linux Kernel
16=======================
17osd-initiator:
18 The main component of OSD in Kernel is the osd-initiator library. Its main
19user is intended to be the pNFS-over-objects layout driver, which uses objects
20as its back-end data storage. Other clients are the other osd parts listed below.
21
22osd-uld:
23 This is a SCSI ULD that registers for OSD type devices and provides a testing
24platform, both for the in-kernel initiator as well as connected targets. It
25currently has no useful user-mode API, though it could have if need be.
26
27exofs:
28 Is an OSD based Linux file system. It uses the osd-initiator and osd-uld,
29to export a usable file system for users.
30See Documentation/filesystems/exofs.txt for more details
31
32osd target:
33 There are no current plans for an OSD target implementation in kernel. For all
34needs, a user-mode target that is based on the scsi tgt target framework is
35available from Ohio Supercomputer Center (OSC) at:
36http://www.open-osd.org/bin/view/Main/OscOsdProject
37There are several other target implementations. See http://open-osd.org for more
38links.
39
40Files and Folders
41=================
42This is the complete list of files included in this work:
43include/scsi/
44 osd_initiator.h Main API for the initiator library
45 osd_types.h Common OSD types
46 osd_sec.h Security Manager API
47 osd_protocol.h Wire definitions of the OSD standard protocol
48 osd_attributes.h Wire definitions of OSD attributes
49
50drivers/scsi/osd/
51 osd_initiator.c OSD-Initiator library implementation
52 osd_uld.c The OSD scsi ULD
53 osd_ktest.{h,c} In-kernel test suite (called by osd_uld)
54 osd_debug.h Some printk macros
55 Makefile For both in-tree and out-of-tree compilation
56 Kconfig Enables inclusion of the different pieces
57 osd_test.c User-mode application to call the kernel tests
58
59The OSD-Initiator Library
60=========================
61osd_initiator is a low level implementation of an osd initiator encoder.
62But even though, it should be intuitive and easy to use. Perhaps over time an
63higher lever will form that automates some of the more common recipes.
64
65init/fini:
66- osd_dev_init() associates a scsi_device with an osd_dev structure
67 and initializes some global pools. This should be done once per scsi_device
68 (OSD LUN). The osd_dev structure is needed for calling osd_start_request().
69
70- osd_dev_fini() cleans up before a osd_dev/scsi_device destruction.
71
72OSD commands encoding, execution, and decoding of results:
73
74struct osd_request's is used to iteratively encode an OSD command and carry
75its state throughout execution. Each request goes through these stages:
76
77a. osd_start_request() allocates the request.
78
79b. Any of the osd_req_* methods is used to encode a request of the specified
80 type.
81
82c. osd_req_add_{get,set}_attr_* may be called to add get/set attributes to the
83 CDB. "List" or "Page" mode can be used exclusively. The attribute-list API
84 can be called multiple times on the same request. However, only one
85 attribute-page can be read, as mandated by the OSD standard.
86
87d. osd_finalize_request() computes offsets into the data-in and data-out buffers
88 and signs the request using the provided capability key and integrity-
89 check parameters.
90
91e. osd_execute_request() may be called to execute the request via the block
92 layer and wait for its completion. The request can be executed
93 asynchronously by calling the block layer API directly.
94
95f. After execution, osd_req_decode_sense() can be called to decode the request's
96 sense information.
97
98g. osd_req_decode_get_attr() may be called to retrieve osd_add_get_attr_list()
99 values.
100
101h. osd_end_request() must be called to deallocate the request and any resource
102 associated with it. Note that osd_end_request cleans up the request at any
103 stage and it must always be called after a successful osd_start_request().
104
105osd_request's structure:
106
107The OSD standard defines a complex structure of IO segments pointed to by
108members in the CDB. Up to 3 segments can be deployed in the IN-Buffer and up to
1094 in the OUT-Buffer. The ASCII illustration below depicts a secure-read with
110associated get+set of attributes-lists. Other combinations very on the same
111basic theme. From no-segments-used up to all-segments-used.
112
113|________OSD-CDB__________|
114| |
115|read_len (offset=0) -|---------\
116| | |
117|get_attrs_list_length | |
118|get_attrs_list_offset -|----\ |
119| | | |
120|retrieved_attrs_alloc_len| | |
121|retrieved_attrs_offset -|----|----|-\
122| | | | |
123|set_attrs_list_length | | | |
124|set_attrs_list_offset -|-\ | | |
125| | | | | |
126|in_data_integ_offset -|-|--|----|-|-\
127|out_data_integ_offset -|-|--|--\ | | |
128\_________________________/ | | | | | |
129 | | | | | |
130|_______OUT-BUFFER________| | | | | | |
131| Set attr list |</ | | | | |
132| | | | | | |
133|-------------------------| | | | | |
134| Get attr descriptors |<---/ | | | |
135| | | | | |
136|-------------------------| | | | |
137| Out-data integrity |<------/ | | |
138| | | | |
139\_________________________/ | | |
140 | | |
141|________IN-BUFFER________| | | |
142| In-Data read |<--------/ | |
143| | | |
144|-------------------------| | |
145| Get attr list |<----------/ |
146| | |
147|-------------------------| |
148| In-data integrity |<------------/
149| |
150\_________________________/
151
152A block device request can carry bidirectional payload by means of associating
153a bidi_read request with a main write-request. Each in/out request is described
154by a chain of BIOs associated with each request.
155The CDB is of a SCSI VARLEN CDB format, as described by OSD standard.
156The OSD standard also mandates alignment restrictions at start of each segment.
157
158In the code, in struct osd_request, there are two _osd_io_info structures to
159describe the IN/OUT buffers above, two BIOs for the data payload and up to five
160_osd_req_data_segment structures to hold the different segments allocation and
161information.
162
163Important: We have chosen to disregard the assumption that a BIO-chain (and
164the resulting sg-list) describes a linear memory buffer. Meaning only first and
165last scatter chain can be incomplete and all the middle chains are of PAGE_SIZE.
166For us, a scatter-gather-list, as its name implies and as used by the Networking
167layer, is to describe a vector of buffers that will be transferred to/from the
168wire. It works very well with current iSCSI transport. iSCSI is currently the
169only deployed OSD transport. In the future we anticipate SAS and FC attached OSD
170devices as well.
171
172The OSD Testing ULD
173===================
174TODO: More user-mode control on tests.
175
176Authors, Mailing list
177=====================
178Please communicate with us on any deployment of osd, whether using this code
179or not.
180
181Any problems, questions, bug reports, lonely OSD nights, please email:
182 OSD Dev List <osd-dev@open-osd.org>
183
184More up-to-date information can be found on:
185http://open-osd.org
186
187Boaz Harrosh <bharrosh@panasas.com>
188Benny Halevy <bhalevy@panasas.com>
189
190References
191==========
192Weber, R., "SCSI Object-Based Storage Device Commands",
193T10/1355-D ANSI/INCITS 400-2004,
194http://www.t10.org/ftp/t10/drafts/osd/osd-r10.pdf
195
196Weber, R., "SCSI Object-Based Storage Device Commands -2 (OSD-2)"
197T10/1729-D, Working Draft, rev. 3
198http://www.t10.org/ftp/t10/drafts/osd2/osd2r03.pdf
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index e5b071d46619..d7f181701dc2 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -1,10 +1,11 @@
1 SCSI FC Tansport 1 SCSI FC Tansport
2 ============================================= 2 =============================================
3 3
4Date: 4/12/2007 4Date: 11/18/2008
5Kernel Revisions for features: 5Kernel Revisions for features:
6 rports : <<TBS>> 6 rports : <<TBS>>
7 vports : 2.6.22 (? TBD) 7 vports : 2.6.22
8 bsg support : 2.6.30 (?TBD?)
8 9
9 10
10Introduction 11Introduction
@@ -15,6 +16,7 @@ The FC transport can be found at:
15 drivers/scsi/scsi_transport_fc.c 16 drivers/scsi/scsi_transport_fc.c
16 include/scsi/scsi_transport_fc.h 17 include/scsi/scsi_transport_fc.h
17 include/scsi/scsi_netlink_fc.h 18 include/scsi/scsi_netlink_fc.h
19 include/scsi/scsi_bsg_fc.h
18 20
19This file is found at Documentation/scsi/scsi_fc_transport.txt 21This file is found at Documentation/scsi/scsi_fc_transport.txt
20 22
@@ -472,6 +474,14 @@ int
472fc_vport_terminate(struct fc_vport *vport) 474fc_vport_terminate(struct fc_vport *vport)
473 475
474 476
477FC BSG support (CT & ELS passthru, and more)
478========================================================================
479<< To Be Supplied >>
480
481
482
483
484
475Credits 485Credits
476======= 486=======
477The following people have contributed to this document: 487The following people have contributed to this document:
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index a6d5354639b2..de67229251d8 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -1271,6 +1271,11 @@ of interest:
1271 hostdata[0] - area reserved for LLD at end of struct Scsi_Host. Size 1271 hostdata[0] - area reserved for LLD at end of struct Scsi_Host. Size
1272 is set by the second argument (named 'xtr_bytes') to 1272 is set by the second argument (named 'xtr_bytes') to
1273 scsi_host_alloc() or scsi_register(). 1273 scsi_host_alloc() or scsi_register().
1274 vendor_id - a unique value that identifies the vendor supplying
1275 the LLD for the Scsi_Host. Used most often in validating
1276 vendor-specific message requests. Value consists of an
1277 identifier type and a vendor-specific value.
1278 See scsi_netlink.h for a description of valid formats.
1274 1279
1275The scsi_host structure is defined in include/scsi/scsi_host.h 1280The scsi_host structure is defined in include/scsi/scsi_host.h
1276 1281
diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt
index 49ea5c58c6bc..eb9a7b905b64 100644
--- a/Documentation/scsi/sym53c8xx_2.txt
+++ b/Documentation/scsi/sym53c8xx_2.txt
@@ -206,7 +206,7 @@ characteristics. This feature may also reduce average command latency.
206In order to really gain advantage of this feature, devices must have 206In order to really gain advantage of this feature, devices must have
207a reasonable cache size (No miracle is to be expected for a low-end 207a reasonable cache size (No miracle is to be expected for a low-end
208hard disk with 128 KB or less). 208hard disk with 128 KB or less).
209Some kown old SCSI devices do not properly support tagged command queuing. 209Some known old SCSI devices do not properly support tagged command queuing.
210Generally, firmware revisions that fix this kind of problems are available 210Generally, firmware revisions that fix this kind of problems are available
211at respective vendor web/ftp sites. 211at respective vendor web/ftp sites.
212All I can say is that I never have had problem with tagged queuing using 212All I can say is that I never have had problem with tagged queuing using
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
new file mode 100644
index 000000000000..ebc50f808ea4
--- /dev/null
+++ b/Documentation/slow-work.txt
@@ -0,0 +1,174 @@
1 ====================================
2 SLOW WORK ITEM EXECUTION THREAD POOL
3 ====================================
4
5By: David Howells <dhowells@redhat.com>
6
7The slow work item execution thread pool is a pool of threads for performing
8things that take a relatively long time, such as making mkdir calls.
9Typically, when processing something, these items will spend a lot of time
10blocking a thread on I/O, thus making that thread unavailable for doing other
11work.
12
13The standard workqueue model is unsuitable for this class of work item as that
14limits the owner to a single thread or a single thread per CPU. For some
15tasks, however, more threads - or fewer - are required.
16
17There is just one pool per system. It contains no threads unless something
18wants to use it - and that something must register its interest first. When
19the pool is active, the number of threads it contains is dynamic, varying
20between a maximum and minimum setting, depending on the load.
21
22
23====================
24CLASSES OF WORK ITEM
25====================
26
27This pool support two classes of work items:
28
29 (*) Slow work items.
30
31 (*) Very slow work items.
32
33The former are expected to finish much quicker than the latter.
34
35An operation of the very slow class may do a batch combination of several
36lookups, mkdirs, and a create for instance.
37
38An operation of the ordinarily slow class may, for example, write stuff or
39expand files, provided the time taken to do so isn't too long.
40
41Operations of both types may sleep during execution, thus tying up the thread
42loaned to it.
43
44
45THREAD-TO-CLASS ALLOCATION
46--------------------------
47
48Not all the threads in the pool are available to work on very slow work items.
49The number will be between one and one fewer than the number of active threads.
50This is configurable (see the "Pool Configuration" section).
51
52All the threads are available to work on ordinarily slow work items, but a
53percentage of the threads will prefer to work on very slow work items.
54
55The configuration ensures that at least one thread will be available to work on
56very slow work items, and at least one thread will be available that won't work
57on very slow work items at all.
58
59
60=====================
61USING SLOW WORK ITEMS
62=====================
63
64Firstly, a module or subsystem wanting to make use of slow work items must
65register its interest:
66
67 int ret = slow_work_register_user();
68
69This will return 0 if successful, or a -ve error upon failure.
70
71
72Slow work items may then be set up by:
73
74 (1) Declaring a slow_work struct type variable:
75
76 #include <linux/slow-work.h>
77
78 struct slow_work myitem;
79
80 (2) Declaring the operations to be used for this item:
81
82 struct slow_work_ops myitem_ops = {
83 .get_ref = myitem_get_ref,
84 .put_ref = myitem_put_ref,
85 .execute = myitem_execute,
86 };
87
88 [*] For a description of the ops, see section "Item Operations".
89
90 (3) Initialising the item:
91
92 slow_work_init(&myitem, &myitem_ops);
93
94 or:
95
96 vslow_work_init(&myitem, &myitem_ops);
97
98 depending on its class.
99
100A suitably set up work item can then be enqueued for processing:
101
102 int ret = slow_work_enqueue(&myitem);
103
104This will return a -ve error if the thread pool is unable to gain a reference
105on the item, 0 otherwise.
106
107
108The items are reference counted, so there ought to be no need for a flush
109operation. When all a module's slow work items have been processed, and the
110module has no further interest in the facility, it should unregister its
111interest:
112
113 slow_work_unregister_user();
114
115
116===============
117ITEM OPERATIONS
118===============
119
120Each work item requires a table of operations of type struct slow_work_ops.
121All members are required:
122
123 (*) Get a reference on an item:
124
125 int (*get_ref)(struct slow_work *work);
126
127 This allows the thread pool to attempt to pin an item by getting a
128 reference on it. This function should return 0 if the reference was
129 granted, or a -ve error otherwise. If an error is returned,
130 slow_work_enqueue() will fail.
131
132 The reference is held whilst the item is queued and whilst it is being
133 executed. The item may then be requeued with the same reference held, or
134 the reference will be released.
135
136 (*) Release a reference on an item:
137
138 void (*put_ref)(struct slow_work *work);
139
140 This allows the thread pool to unpin an item by releasing the reference on
141 it. The thread pool will not touch the item again once this has been
142 called.
143
144 (*) Execute an item:
145
146 void (*execute)(struct slow_work *work);
147
148 This should perform the work required of the item. It may sleep, it may
149 perform disk I/O and it may wait for locks.
150
151
152==================
153POOL CONFIGURATION
154==================
155
156The slow-work thread pool has a number of configurables:
157
158 (*) /proc/sys/kernel/slow-work/min-threads
159
160 The minimum number of threads that should be in the pool whilst it is in
161 use. This may be anywhere between 2 and max-threads.
162
163 (*) /proc/sys/kernel/slow-work/max-threads
164
165 The maximum number of threads that should in the pool. This may be
166 anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater.
167
168 (*) /proc/sys/kernel/slow-work/vslow-percentage
169
170 The percentage of active threads in the pool that may be used to execute
171 very slow work items. This may be between 1 and 99. The resultant number
172 is bounded to between 1 and one fewer than the number of active threads.
173 This ensures there is always at least one thread that can process very
174 slow work items, and always at least one thread that won't.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 841a9365d5fd..4252697a95d6 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -346,6 +346,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
346 sbirq - IRQ # for CMI8330 chip (SB16) 346 sbirq - IRQ # for CMI8330 chip (SB16)
347 sbdma8 - 8bit DMA # for CMI8330 chip (SB16) 347 sbdma8 - 8bit DMA # for CMI8330 chip (SB16)
348 sbdma16 - 16bit DMA # for CMI8330 chip (SB16) 348 sbdma16 - 16bit DMA # for CMI8330 chip (SB16)
349 fmport - (optional) OPL3 I/O port
350 mpuport - (optional) MPU401 I/O port
351 mpuirq - (optional) MPU401 irq #
349 352
350 This module supports multiple cards and autoprobe. 353 This module supports multiple cards and autoprobe.
351 354
@@ -388,34 +391,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
388 391
389 The power-management is supported. 392 The power-management is supported.
390 393
391 Module snd-cs4232
392 -----------------
393
394 Module for sound cards based on CS4232/CS4232A ISA chips.
395
396 isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
397
398 with isapnp=0, the following options are available:
399
400 port - port # for CS4232 chip (PnP setup - 0x534)
401 cport - control port # for CS4232 chip (PnP setup - 0x120,0x210,0xf00)
402 mpu_port - port # for MPU-401 UART (PnP setup - 0x300), -1 = disable
403 fm_port - FM port # for CS4232 chip (PnP setup - 0x388), -1 = disable
404 irq - IRQ # for CS4232 chip (5,7,9,11,12,15)
405 mpu_irq - IRQ # for MPU-401 UART (9,11,12,15)
406 dma1 - first DMA # for CS4232 chip (0,1,3)
407 dma2 - second DMA # for Yamaha CS4232 chip (0,1,3), -1 = disable
408
409 This module supports multiple cards. This module does not support autoprobe
410 (if ISA PnP is not used) thus main port must be specified!!! Other ports are
411 optional.
412
413 The power-management is supported.
414
415 Module snd-cs4236 394 Module snd-cs4236
416 ----------------- 395 -----------------
417 396
418 Module for sound cards based on CS4235/CS4236/CS4236B/CS4237B/ 397 Module for sound cards based on CS4232/CS4232A,
398 CS4235/CS4236/CS4236B/CS4237B/
419 CS4238B/CS4239 ISA chips. 399 CS4238B/CS4239 ISA chips.
420 400
421 isapnp - ISA PnP detection - 0 = disable, 1 = enable (default) 401 isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
@@ -437,6 +417,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
437 417
438 The power-management is supported. 418 The power-management is supported.
439 419
420 This module is aliased as snd-cs4232 since it provides the old
421 snd-cs4232 functionality, too.
422
440 Module snd-cs4281 423 Module snd-cs4281
441 ----------------- 424 -----------------
442 425
@@ -477,6 +460,25 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
477 460
478 The power-management is supported. 461 The power-management is supported.
479 462
463 Module snd-ctxfi
464 ----------------
465
466 Module for Creative Sound Blaster X-Fi boards (20k1 / 20k2 chips)
467 * Creative Sound Blaster X-Fi Titanium Fatal1ty Champion Series
468 * Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series
469 * Creative Sound Blaster X-Fi Titanium Professional Audio
470 * Creative Sound Blaster X-Fi Titanium
471 * Creative Sound Blaster X-Fi Elite Pro
472 * Creative Sound Blaster X-Fi Platinum
473 * Creative Sound Blaster X-Fi Fatal1ty
474 * Creative Sound Blaster X-Fi XtremeGamer
475 * Creative Sound Blaster X-Fi XtremeMusic
476
477 reference_rate - reference sample rate, 44100 or 48000 (default)
478 multiple - multiple to ref. sample rate, 1 or 2 (default)
479
480 This module supports multiple cards.
481
480 Module snd-darla20 482 Module snd-darla20
481 ------------------ 483 ------------------
482 484
@@ -606,6 +608,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
606 Module for ESS AudioDrive ES-1688 and ES-688 sound cards. 608 Module for ESS AudioDrive ES-1688 and ES-688 sound cards.
607 609
608 port - port # for ES-1688 chip (0x220,0x240,0x260) 610 port - port # for ES-1688 chip (0x220,0x240,0x260)
611 fm_port - port # for OPL3 (option; share the same port as default)
609 mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable (default) 612 mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable (default)
610 irq - IRQ # for ES-1688 chip (5,7,9,10) 613 irq - IRQ # for ES-1688 chip (5,7,9,10)
611 mpu_irq - IRQ # for MPU-401 port (5,7,9,10) 614 mpu_irq - IRQ # for MPU-401 port (5,7,9,10)
@@ -757,6 +760,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
757 model - force the model name 760 model - force the model name
758 position_fix - Fix DMA pointer (0 = auto, 1 = use LPIB, 2 = POSBUF) 761 position_fix - Fix DMA pointer (0 = auto, 1 = use LPIB, 2 = POSBUF)
759 probe_mask - Bitmask to probe codecs (default = -1, meaning all slots) 762 probe_mask - Bitmask to probe codecs (default = -1, meaning all slots)
763 When the bit 8 (0x100) is set, the lower 8 bits are used
764 as the "fixed" codec slots; i.e. the driver probes the
765 slots regardless what hardware reports back
760 probe_only - Only probing and no codec initialization (default=off); 766 probe_only - Only probing and no codec initialization (default=off);
761 Useful to check the initial codec status for debugging 767 Useful to check the initial codec status for debugging
762 bdl_pos_adj - Specifies the DMA IRQ timing delay in samples. 768 bdl_pos_adj - Specifies the DMA IRQ timing delay in samples.
@@ -767,7 +773,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
767 single_cmd - Use single immediate commands to communicate with 773 single_cmd - Use single immediate commands to communicate with
768 codecs (for debugging only) 774 codecs (for debugging only)
769 enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) 775 enable_msi - Enable Message Signaled Interrupt (MSI) (default = off)
770 power_save - Automatic power-saving timtout (in second, 0 = 776 power_save - Automatic power-saving timeout (in second, 0 =
771 disable) 777 disable)
772 power_save_controller - Reset HD-audio controller in power-saving mode 778 power_save_controller - Reset HD-audio controller in power-saving mode
773 (default = on) 779 (default = on)
@@ -938,6 +944,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
938 * Onkyo SE-90PCI 944 * Onkyo SE-90PCI
939 * Onkyo SE-200PCI 945 * Onkyo SE-200PCI
940 * ESI Juli@ 946 * ESI Juli@
947 * ESI Maya44
941 * Hercules Fortissimo IV 948 * Hercules Fortissimo IV
942 * EGO-SYS WaveTerminal 192M 949 * EGO-SYS WaveTerminal 192M
943 950
@@ -946,7 +953,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
946 prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192, 953 prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192,
947 juli, aureon51, aureon71, universe, ap192, k8x800, 954 juli, aureon51, aureon71, universe, ap192, k8x800,
948 phase22, phase28, ms300, av710, se200pci, se90pci, 955 phase22, phase28, ms300, av710, se200pci, se90pci,
949 fortissimo4, sn25p, WT192M 956 fortissimo4, sn25p, WT192M, maya44
950 957
951 This module supports multiple cards and autoprobe. 958 This module supports multiple cards and autoprobe.
952 959
@@ -1106,6 +1113,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1106 This module supports multiple cards. 1113 This module supports multiple cards.
1107 The driver requires the firmware loader support on kernel. 1114 The driver requires the firmware loader support on kernel.
1108 1115
1116 Module snd-lx6464es
1117 -------------------
1118
1119 Module for Digigram LX6464ES boards
1120
1121 This module supports multiple cards.
1122
1109 Module snd-maestro3 1123 Module snd-maestro3
1110 ------------------- 1124 -------------------
1111 1125
@@ -1185,6 +1199,54 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1185 1199
1186 This module supports multiple devices and PnP. 1200 This module supports multiple devices and PnP.
1187 1201
1202 Module snd-msnd-classic
1203 -----------------------
1204
1205 Module for Turtle Beach MultiSound Classic, Tahiti or Monterey
1206 soundcards.
1207
1208 io - Port # for msnd-classic card
1209 irq - IRQ # for msnd-classic card
1210 mem - Memory address (0xb0000, 0xc8000, 0xd0000, 0xd8000,
1211 0xe0000 or 0xe8000)
1212 write_ndelay - enable write ndelay (default = 1)
1213 calibrate_signal - calibrate signal (default = 0)
1214 isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
1215 digital - Digital daughterboard present (default = 0)
1216 cfg - Config port (0x250, 0x260 or 0x270) default = PnP
1217 reset - Reset all devices
1218 mpu_io - MPU401 I/O port
1219 mpu_irq - MPU401 irq#
1220 ide_io0 - IDE port #0
1221 ide_io1 - IDE port #1
1222 ide_irq - IDE irq#
1223 joystick_io - Joystick I/O port
1224
1225 The driver requires firmware files "turtlebeach/msndinit.bin" and
1226 "turtlebeach/msndperm.bin" in the proper firmware directory.
1227
1228 See Documentation/sound/oss/MultiSound for important information
1229 about this driver. Note that it has been discontinued, but the
1230 Voyetra Turtle Beach knowledge base entry for it is still available
1231 at
1232 http://www.turtlebeach.com/site/kb_ftp/790.asp
1233
1234 Module snd-msnd-pinnacle
1235 ------------------------
1236
1237 Module for Turtle Beach MultiSound Pinnacle/Fiji soundcards.
1238
1239 io - Port # for pinnacle/fiji card
1240 irq - IRQ # for pinnalce/fiji card
1241 mem - Memory address (0xb0000, 0xc8000, 0xd0000, 0xd8000,
1242 0xe0000 or 0xe8000)
1243 write_ndelay - enable write ndelay (default = 1)
1244 calibrate_signal - calibrate signal (default = 0)
1245 isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
1246
1247 The driver requires firmware files "turtlebeach/pndspini.bin" and
1248 "turtlebeach/pndsperm.bin" in the proper firmware directory.
1249
1188 Module snd-mtpav 1250 Module snd-mtpav
1189 ---------------- 1251 ----------------
1190 1252
@@ -1508,13 +1570,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1508 Module snd-sc6000 1570 Module snd-sc6000
1509 ----------------- 1571 -----------------
1510 1572
1511 Module for Gallant SC-6000 soundcard. 1573 Module for Gallant SC-6000 soundcard and later models: SC-6600
1574 and SC-7000.
1512 1575
1513 port - Port # (0x220 or 0x240) 1576 port - Port # (0x220 or 0x240)
1514 mss_port - MSS Port # (0x530 or 0xe80) 1577 mss_port - MSS Port # (0x530 or 0xe80)
1515 irq - IRQ # (5,7,9,10,11) 1578 irq - IRQ # (5,7,9,10,11)
1516 mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq 1579 mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq
1517 dma - DMA # (1,3,0) 1580 dma - DMA # (1,3,0)
1581 joystick - Enable gameport - 0 = disable (default), 1 = enable
1518 1582
1519 This module supports multiple cards. 1583 This module supports multiple cards.
1520 1584
@@ -1824,7 +1888,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1824 ------------------- 1888 -------------------
1825 1889
1826 Module for sound cards based on the Asus AV100/AV200 chips, 1890 Module for sound cards based on the Asus AV100/AV200 chips,
1827 i.e., Xonar D1, DX, D2, D2X and HDAV1.3 (Deluxe). 1891 i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), Essence ST
1892 (Deluxe) and Essence STX.
1828 1893
1829 This module supports autoprobe and multiple cards. 1894 This module supports autoprobe and multiple cards.
1830 1895
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 0f5d26bea80f..0d8d23581c44 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -36,6 +36,7 @@ ALC260
36 acer Acer TravelMate 36 acer Acer TravelMate
37 will Will laptops (PB V7900) 37 will Will laptops (PB V7900)
38 replacer Replacer 672V 38 replacer Replacer 672V
39 favorit100 Maxdata Favorit 100XS
39 basic fixed pin assignment (old default model) 40 basic fixed pin assignment (old default model)
40 test for testing/debugging purpose, almost all controls can 41 test for testing/debugging purpose, almost all controls can
41 adjusted. Appearing only when compiled with 42 adjusted. Appearing only when compiled with
@@ -56,6 +57,7 @@ ALC262
56 sony-assamd Sony ASSAMD 57 sony-assamd Sony ASSAMD
57 toshiba-s06 Toshiba S06 58 toshiba-s06 Toshiba S06
58 toshiba-rx1 Toshiba RX1 59 toshiba-rx1 Toshiba RX1
60 tyan Tyan Thunder n6650W (S2915-E)
59 ultra Samsung Q1 Ultra Vista model 61 ultra Samsung Q1 Ultra Vista model
60 lenovo-3000 Lenovo 3000 y410 62 lenovo-3000 Lenovo 3000 y410
61 nec NEC Versa S9100 63 nec NEC Versa S9100
@@ -84,10 +86,11 @@ ALC269
84 eeepc-p703 ASUS Eeepc P703 P900A 86 eeepc-p703 ASUS Eeepc P703 P900A
85 eeepc-p901 ASUS Eeepc P901 S101 87 eeepc-p901 ASUS Eeepc P901 S101
86 fujitsu FSC Amilo 88 fujitsu FSC Amilo
89 lifebook Fujitsu Lifebook S6420
87 auto auto-config reading BIOS (default) 90 auto auto-config reading BIOS (default)
88 91
89ALC662/663 92ALC662/663/272
90========== 93==============
91 3stack-dig 3-stack (2-channel) with SPDIF 94 3stack-dig 3-stack (2-channel) with SPDIF
92 3stack-6ch 3-stack (6-channel) 95 3stack-6ch 3-stack (6-channel)
93 3stack-6ch-dig 3-stack (6-channel) with SPDIF 96 3stack-6ch-dig 3-stack (6-channel) with SPDIF
@@ -106,6 +109,9 @@ ALC662/663
106 asus-mode4 ASUS 109 asus-mode4 ASUS
107 asus-mode5 ASUS 110 asus-mode5 ASUS
108 asus-mode6 ASUS 111 asus-mode6 ASUS
112 dell Dell with ALC272
113 dell-zm1 Dell ZM1 with ALC272
114 samsung-nc10 Samsung NC10 mini notebook
109 auto auto-config reading BIOS (default) 115 auto auto-config reading BIOS (default)
110 116
111ALC882/885 117ALC882/885
@@ -117,6 +123,7 @@ ALC882/885
117 asus-a7j ASUS A7J 123 asus-a7j ASUS A7J
118 asus-a7m ASUS A7M 124 asus-a7m ASUS A7M
119 macpro MacPro support 125 macpro MacPro support
126 mb5 Macbook 5,1
120 mbp3 Macbook Pro rev3 127 mbp3 Macbook Pro rev3
121 imac24 iMac 24'' with jack detection 128 imac24 iMac 24'' with jack detection
122 w2jc ASUS W2JC 129 w2jc ASUS W2JC
@@ -132,10 +139,13 @@ ALC883/888
132 acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc) 139 acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
133 acer-aspire Acer Aspire 9810 140 acer-aspire Acer Aspire 9810
134 acer-aspire-4930g Acer Aspire 4930G 141 acer-aspire-4930g Acer Aspire 4930G
142 acer-aspire-6530g Acer Aspire 6530G
143 acer-aspire-8930g Acer Aspire 8930G
135 medion Medion Laptops 144 medion Medion Laptops
136 medion-md2 Medion MD2 145 medion-md2 Medion MD2
137 targa-dig Targa/MSI 146 targa-dig Targa/MSI
138 targa-2ch-dig Targs/MSI with 2-channel 147 targa-2ch-dig Targa/MSI with 2-channel
148 targa-8ch-dig Targa/MSI with 8-channel (MSI GX620)
139 laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE) 149 laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE)
140 lenovo-101e Lenovo 101E 150 lenovo-101e Lenovo 101E
141 lenovo-nb0763 Lenovo NB0763 151 lenovo-nb0763 Lenovo NB0763
@@ -149,6 +159,9 @@ ALC883/888
149 fujitsu-pi2515 Fujitsu AMILO Pi2515 159 fujitsu-pi2515 Fujitsu AMILO Pi2515
150 fujitsu-xa3530 Fujitsu AMILO XA3530 160 fujitsu-xa3530 Fujitsu AMILO XA3530
151 3stack-6ch-intel Intel DG33* boards 161 3stack-6ch-intel Intel DG33* boards
162 asus-p5q ASUS P5Q-EM boards
163 mb31 MacBook 3,1
164 sony-vaio-tt Sony VAIO TT
152 auto auto-config reading BIOS (default) 165 auto auto-config reading BIOS (default)
153 166
154ALC861/660 167ALC861/660
@@ -261,6 +274,8 @@ Conexant 5051
261============= 274=============
262 laptop Basic Laptop config (default) 275 laptop Basic Laptop config (default)
263 hp HP Spartan laptop 276 hp HP Spartan laptop
277 hp-dv6736 HP dv6736
278 lenovo-x200 Lenovo X200 laptop
264 279
265STAC9200 280STAC9200
266======== 281========
@@ -278,6 +293,7 @@ STAC9200
278 gateway-m4 Gateway laptops with EAPD control 293 gateway-m4 Gateway laptops with EAPD control
279 gateway-m4-2 Gateway laptops with EAPD control 294 gateway-m4-2 Gateway laptops with EAPD control
280 panasonic Panasonic CF-74 295 panasonic Panasonic CF-74
296 auto BIOS setup (default)
281 297
282STAC9205/9254 298STAC9205/9254
283============= 299=============
@@ -285,6 +301,8 @@ STAC9205/9254
285 dell-m42 Dell (unknown) 301 dell-m42 Dell (unknown)
286 dell-m43 Dell Precision 302 dell-m43 Dell Precision
287 dell-m44 Dell Inspiron 303 dell-m44 Dell Inspiron
304 eapd Keep EAPD on (e.g. Gateway T1616)
305 auto BIOS setup (default)
288 306
289STAC9220/9221 307STAC9220/9221
290============= 308=============
@@ -308,6 +326,7 @@ STAC9220/9221
308 dell-d82 Dell (unknown) 326 dell-d82 Dell (unknown)
309 dell-m81 Dell (unknown) 327 dell-m81 Dell (unknown)
310 dell-m82 Dell XPS M1210 328 dell-m82 Dell XPS M1210
329 auto BIOS setup (default)
311 330
312STAC9202/9250/9251 331STAC9202/9250/9251
313================== 332==================
@@ -319,6 +338,7 @@ STAC9202/9250/9251
319 m3 Some Gateway MX series laptops 338 m3 Some Gateway MX series laptops
320 m5 Some Gateway MX series laptops (MP6954) 339 m5 Some Gateway MX series laptops (MP6954)
321 m6 Some Gateway NX series laptops 340 m6 Some Gateway NX series laptops
341 auto BIOS setup (default)
322 342
323STAC9227/9228/9229/927x 343STAC9227/9228/9229/927x
324======================= 344=======================
@@ -326,8 +346,10 @@ STAC9227/9228/9229/927x
326 ref-no-jd Reference board without HP/Mic jack detection 346 ref-no-jd Reference board without HP/Mic jack detection
327 3stack D965 3stack 347 3stack D965 3stack
328 5stack D965 5stack + SPDIF 348 5stack D965 5stack + SPDIF
349 5stack-no-fp D965 5stack without front panel
329 dell-3stack Dell Dimension E520 350 dell-3stack Dell Dimension E520
330 dell-bios Fixes with Dell BIOS setup 351 dell-bios Fixes with Dell BIOS setup
352 auto BIOS setup (default)
331 353
332STAC92HD71B* 354STAC92HD71B*
333============ 355============
@@ -335,7 +357,11 @@ STAC92HD71B*
335 dell-m4-1 Dell desktops 357 dell-m4-1 Dell desktops
336 dell-m4-2 Dell desktops 358 dell-m4-2 Dell desktops
337 dell-m4-3 Dell desktops 359 dell-m4-3 Dell desktops
338 hp-m4 HP dv laptops 360 hp-m4 HP mini 1000
361 hp-dv5 HP dv series
362 hp-hdx HP HDX series
363 hp-dv4-1222nr HP dv4-1222nr (with LED support)
364 auto BIOS setup (default)
339 365
340STAC92HD73* 366STAC92HD73*
341=========== 367===========
@@ -345,13 +371,16 @@ STAC92HD73*
345 dell-m6-dmic Dell desktops/laptops with digital mics 371 dell-m6-dmic Dell desktops/laptops with digital mics
346 dell-m6 Dell desktops/laptops with both type of mics 372 dell-m6 Dell desktops/laptops with both type of mics
347 dell-eq Dell desktops/laptops 373 dell-eq Dell desktops/laptops
374 auto BIOS setup (default)
348 375
349STAC92HD83* 376STAC92HD83*
350=========== 377===========
351 ref Reference board 378 ref Reference board
352 mic-ref Reference board with power managment for ports 379 mic-ref Reference board with power managment for ports
380 dell-s14 Dell laptop
381 auto BIOS setup (default)
353 382
354STAC9872 383STAC9872
355======== 384========
356 vaio Setup for VAIO FE550G/SZ110 385 vaio VAIO laptop without SPDIF
357 vaio-ar Setup for VAIO AR 386 auto BIOS setup (default)
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index 8d68fff71839..71ac995b1915 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -16,7 +16,7 @@ methods for the HD-audio hardware.
16The HD-audio component consists of two parts: the controller chip and 16The HD-audio component consists of two parts: the controller chip and
17the codec chips on the HD-audio bus. Linux provides a single driver 17the codec chips on the HD-audio bus. Linux provides a single driver
18for all controllers, snd-hda-intel. Although the driver name contains 18for all controllers, snd-hda-intel. Although the driver name contains
19a word of a well-known harware vendor, it's not specific to it but for 19a word of a well-known hardware vendor, it's not specific to it but for
20all controller chips by other companies. Since the HD-audio 20all controller chips by other companies. Since the HD-audio
21controllers are supposed to be compatible, the single snd-hda-driver 21controllers are supposed to be compatible, the single snd-hda-driver
22should work in most cases. But, not surprisingly, there are known 22should work in most cases. But, not surprisingly, there are known
@@ -109,6 +109,13 @@ slot, pass `probe_mask=1`. For the first and the third slots, pass
109Since 2.6.29 kernel, the driver has a more robust probing method, so 109Since 2.6.29 kernel, the driver has a more robust probing method, so
110this error might happen rarely, though. 110this error might happen rarely, though.
111 111
112On a machine with a broken BIOS, sometimes you need to force the
113driver to probe the codec slots the hardware doesn't report for use.
114In such a case, turn the bit 8 (0x100) of `probe_mask` option on.
115Then the rest 8 bits are passed as the codec slots to probe
116unconditionally. For example, `probe_mask=0x103` will force to probe
117the codec slots 0 and 1 no matter what the hardware reports.
118
112 119
113Interrupt Handling 120Interrupt Handling
114~~~~~~~~~~~~~~~~~~ 121~~~~~~~~~~~~~~~~~~
@@ -162,7 +169,7 @@ PCI SSID look-up.
162What `model` option values are available depends on the codec chip. 169What `model` option values are available depends on the codec chip.
163Check your codec chip from the codec proc file (see "Codec Proc-File" 170Check your codec chip from the codec proc file (see "Codec Proc-File"
164section below). It will show the vendor/product name of your codec 171section below). It will show the vendor/product name of your codec
165chip. Then, see Documentation/sound/alsa/HD-Audio-Modelstxt file, 172chip. Then, see Documentation/sound/alsa/HD-Audio-Models.txt file,
166the section of HD-audio driver. You can find a list of codecs 173the section of HD-audio driver. You can find a list of codecs
167and `model` options belonging to each codec. For example, for Realtek 174and `model` options belonging to each codec. For example, for Realtek
168ALC262 codec chip, pass `model=ultra` for devices that are compatible 175ALC262 codec chip, pass `model=ultra` for devices that are compatible
@@ -170,7 +177,7 @@ with Samsung Q1 Ultra.
170 177
171Thus, the first thing you can do for any brand-new, unsupported and 178Thus, the first thing you can do for any brand-new, unsupported and
172non-working HD-audio hardware is to check HD-audio codec and several 179non-working HD-audio hardware is to check HD-audio codec and several
173different `model` option values. If you have a luck, some of them 180different `model` option values. If you have any luck, some of them
174might suit with your device well. 181might suit with your device well.
175 182
176Some codecs such as ALC880 have a special model option `model=test`. 183Some codecs such as ALC880 have a special model option `model=test`.
@@ -358,10 +365,26 @@ modelname::
358 to this file. 365 to this file.
359init_verbs:: 366init_verbs::
360 The extra verbs to execute at initialization. You can add a verb by 367 The extra verbs to execute at initialization. You can add a verb by
361 writing to this file. Pass tree numbers, nid, verb and parameter. 368 writing to this file. Pass three numbers: nid, verb and parameter
369 (separated with a space).
362hints:: 370hints::
363 Shows hint strings for codec parsers for any use. Right now it's 371 Shows / stores hint strings for codec parsers for any use.
364 not used. 372 Its format is `key = value`. For example, passing `hp_detect = yes`
373 to IDT/STAC codec parser will result in the disablement of the
374 headphone detection.
375init_pin_configs::
376 Shows the initial pin default config values set by BIOS.
377driver_pin_configs::
378 Shows the pin default values set by the codec parser explicitly.
379 This doesn't show all pin values but only the changed values by
380 the parser. That is, if the parser doesn't change the pin default
381 config values by itself, this will contain nothing.
382user_pin_configs::
383 Shows the pin default config values to override the BIOS setup.
384 Writing this (with two numbers, NID and value) appends the new
385 value. The given will be used instead of the initial BIOS value at
386 the next reconfiguration time. Note that this config will override
387 even the driver pin configs, too.
365reconfig:: 388reconfig::
366 Triggers the codec re-configuration. When any value is written to 389 Triggers the codec re-configuration. When any value is written to
367 this file, the driver re-initialize and parses the codec tree 390 this file, the driver re-initialize and parses the codec tree
@@ -371,6 +394,14 @@ clear::
371 Resets the codec, removes the mixer elements and PCM stuff of the 394 Resets the codec, removes the mixer elements and PCM stuff of the
372 specified codec, and clear all init verbs and hints. 395 specified codec, and clear all init verbs and hints.
373 396
397For example, when you want to change the pin default configuration
398value of the pin widget 0x14 to 0x9993013f, and let the driver
399re-configure based on that state, run like below:
400------------------------------------------------------------------------
401 # echo 0x14 0x9993013f > /sys/class/sound/hwC0D0/user_pin_configs
402 # echo 1 > /sys/class/sound/hwC0D0/reconfig
403------------------------------------------------------------------------
404
374 405
375Power-Saving 406Power-Saving
376~~~~~~~~~~~~ 407~~~~~~~~~~~~
@@ -461,6 +492,16 @@ run with `--no-upload` option, and attach the generated file.
461There are some other useful options. See `--help` option output for 492There are some other useful options. See `--help` option output for
462details. 493details.
463 494
495When a probe error occurs or when the driver obviously assigns a
496mismatched model, it'd be helpful to load the driver with
497`probe_only=1` option (at best after the cold reboot) and run
498alsa-info at this state. With this option, the driver won't configure
499the mixer and PCM but just tries to probe the codec slot. After
500probing, the proc file is available, so you can get the raw codec
501information before modified by the driver. Of course, the driver
502isn't usable with `probe_only=1`. But you can continue the
503configuration via hwdep sysfs file if hda-reconfig option is enabled.
504
464 505
465hda-verb 506hda-verb
466~~~~~~~~ 507~~~~~~~~
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index bba2dbb79d81..381908d8ca42 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -88,21 +88,34 @@ card*/pcm*/info
88 substreams, etc. 88 substreams, etc.
89 89
90card*/pcm*/xrun_debug 90card*/pcm*/xrun_debug
91 This file appears when CONFIG_SND_DEBUG=y. 91 This file appears when CONFIG_SND_DEBUG=y and
92 This shows the status of xrun (= buffer overrun/xrun) debug of 92 CONFIG_PCM_XRUN_DEBUG=y.
93 ALSA PCM middle layer, as an integer from 0 to 2. The value 93 This shows the status of xrun (= buffer overrun/xrun) and
94 can be changed by writing to this file, such as 94 invalid PCM position debug/check of ALSA PCM middle layer.
95 95 It takes an integer value, can be changed by writing to this
96 # cat 2 > /proc/asound/card0/pcm0p/xrun_debug 96 file, such as
97 97
98 When this value is greater than 0, the driver will show the 98 # cat 5 > /proc/asound/card0/pcm0p/xrun_debug
99 messages to kernel log when an xrun is detected. The debug 99
100 message is shown also when the invalid H/W pointer is detected 100 The value consists of the following bit flags:
101 at the update of periods (usually called from the interrupt 101 bit 0 = Enable XRUN/jiffies debug messages
102 bit 1 = Show stack trace at XRUN / jiffies check
103 bit 2 = Enable additional jiffies check
104
105 When the bit 0 is set, the driver will show the messages to
106 kernel log when an xrun is detected. The debug message is
107 shown also when the invalid H/W pointer is detected at the
108 update of periods (usually called from the interrupt
102 handler). 109 handler).
103 110
104 When this value is greater than 1, the driver will show the 111 When the bit 1 is set, the driver will show the stack trace
105 stack trace additionally. This may help the debugging. 112 additionally. This may help the debugging.
113
114 Since 2.6.30, this option can enable the hwptr check using
115 jiffies. This detects spontaneous invalid pointer callback
116 values, but can be lead to too much corrections for a (mostly
117 buggy) hardware that doesn't give smooth pointer updates.
118 This feature is enabled via the bit 2.
106 119
107card*/pcm*/sub*/info 120card*/pcm*/sub*/info
108 The general information of this PCM sub-stream. 121 The general information of this PCM sub-stream.
diff --git a/Documentation/sound/alsa/README.maya44 b/Documentation/sound/alsa/README.maya44
new file mode 100644
index 000000000000..0e41576fa13e
--- /dev/null
+++ b/Documentation/sound/alsa/README.maya44
@@ -0,0 +1,163 @@
1NOTE: The following is the original document of Rainer's patch that the
2current maya44 code based on. Some contents might be obsoleted, but I
3keep here as reference -- tiwai
4
5----------------------------------------------------------------
6
7STATE OF DEVELOPMENT:
8
9This driver is being developed on the initiative of Piotr Makowski (oponek@gmail.com) and financed by Lars Bergmann.
10Development is carried out by Rainer Zimmermann (mail@lightshed.de).
11
12ESI provided a sample Maya44 card for the development work.
13
14However, unfortunately it has turned out difficult to get detailed programming information, so I (Rainer Zimmermann) had to find out some card-specific information by experiment and conjecture. Some information (in particular, several GPIO bits) is still missing.
15
16This is the first testing version of the Maya44 driver released to the alsa-devel mailing list (Feb 5, 2008).
17
18
19The following functions work, as tested by Rainer Zimmermann and Piotr Makowski:
20
21- playback and capture at all sampling rates
22- input/output level
23- crossmixing
24- line/mic switch
25- phantom power switch
26- analogue monitor a.k.a bypass
27
28
29The following functions *should* work, but are not fully tested:
30
31- Channel 3+4 analogue - S/PDIF input switching
32- S/PDIF output
33- all inputs/outputs on the M/IO/DIO extension card
34- internal/external clock selection
35
36
37*In particular, we would appreciate testing of these functions by anyone who has access to an M/IO/DIO extension card.*
38
39
40Things that do not seem to work:
41
42- The level meters ("multi track") in 'alsamixer' do not seem to react to signals in (if this is a bug, it would probably be in the existing ICE1724 code).
43
44- Ardour 2.1 seems to work only via JACK, not using ALSA directly or via OSS. This still needs to be tracked down.
45
46
47DRIVER DETAILS:
48
49the following files were added:
50
51pci/ice1724/maya44.c - Maya44 specific code
52pci/ice1724/maya44.h
53pci/ice1724/ice1724.patch
54pci/ice1724/ice1724.h.patch - PROPOSED patch to ice1724.h (see SAMPLING RATES)
55i2c/other/wm8776.c - low-level access routines for Wolfson WM8776 codecs
56include/wm8776.h
57
58
59Note that the wm8776.c code is meant to be card-independent and does not actually register the codec with the ALSA infrastructure.
60This is done in maya44.c, mainly because some of the WM8776 controls are used in Maya44-specific ways, and should be named appropriately.
61
62
63the following files were created in pci/ice1724, simply #including the corresponding file from the alsa-kernel tree:
64
65wtm.h
66vt1720_mobo.h
67revo.h
68prodigy192.h
69pontis.h
70phase.h
71maya44.h
72juli.h
73aureon.h
74amp.h
75envy24ht.h
76se.h
77prodigy_hifi.h
78
79
80*I hope this is the correct way to do things.*
81
82
83SAMPLING RATES:
84
85The Maya44 card (or more exactly, the Wolfson WM8776 codecs) allow a maximum sampling rate of 192 kHz for playback and 92 kHz for capture.
86
87As the ICE1724 chip only allows one global sampling rate, this is handled as follows:
88
89* setting the sampling rate on any open PCM device on the maya44 card will always set the *global* sampling rate for all playback and capture channels.
90
91* In the current state of the driver, setting rates of up to 192 kHz is permitted even for capture devices.
92
93*AVOID CAPTURING AT RATES ABOVE 96kHz*, even though it may appear to work. The codec cannot actually capture at such rates, meaning poor quality.
94
95
96I propose some additional code for limiting the sampling rate when setting on a capture pcm device. However because of the global sampling rate, this logic would be somewhat problematic.
97
98The proposed code (currently deactivated) is in ice1712.h.patch, ice1724.c and maya44.c (in pci/ice1712).
99
100
101SOUND DEVICES:
102
103PCM devices correspond to inputs/outputs as follows (assuming Maya44 is card #0):
104
105hw:0,0 input - stereo, analog input 1+2
106hw:0,0 output - stereo, analog output 1+2
107hw:0,1 input - stereo, analog input 3+4 OR S/PDIF input
108hw:0,1 output - stereo, analog output 3+4 (and SPDIF out)
109
110
111NAMING OF MIXER CONTROLS:
112
113(for more information about the signal flow, please refer to the block diagram on p.24 of the ESI Maya44 manual, or in the ESI windows software).
114
115
116PCM: (digital) output level for channel 1+2
117PCM 1: same for channel 3+4
118
119Mic Phantom+48V: switch for +48V phantom power for electrostatic microphones on input 1/2.
120 Make sure this is not turned on while any other source is connected to input 1/2.
121 It might damage the source and/or the maya44 card.
122
123Mic/Line input: if switch is is on, input jack 1/2 is microphone input (mono), otherwise line input (stereo).
124
125Bypass: analogue bypass from ADC input to output for channel 1+2. Same as "Monitor" in the windows driver.
126Bypass 1: same for channel 3+4.
127
128Crossmix: cross-mixer from channels 1+2 to channels 3+4
129Crossmix 1: cross-mixer from channels 3+4 to channels 1+2
130
131IEC958 Output: switch for S/PDIF output.
132 This is not supported by the ESI windows driver.
133 S/PDIF should output the same signal as channel 3+4. [untested!]
134
135
136Digitial output selectors:
137
138 These switches allow a direct digital routing from the ADCs to the DACs.
139 Each switch determines where the digital input data to one of the DACs comes from.
140 They are not supported by the ESI windows driver.
141 For normal operation, they should all be set to "PCM out".
142
143H/W: Output source channel 1
144H/W 1: Output source channel 2
145H/W 2: Output source channel 3
146H/W 3: Output source channel 4
147
148H/W 4 ... H/W 9: unknown function, left in to enable testing.
149 Possibly some of these control S/PDIF output(s).
150 If these turn out to be unused, they will go away in later driver versions.
151
152Selectable values for each of the digital output selectors are:
153 "PCM out" -> DAC output of the corresponding channel (default setting)
154 "Input 1"...
155 "Input 4" -> direct routing from ADC output of the selected input channel
156
157
158--------
159
160Feb 14, 2008
161Rainer Zimmermann
162mail@lightshed.de
163
diff --git a/Documentation/sound/alsa/hda_codec.txt b/Documentation/sound/alsa/hda_codec.txt
index 34e87ec1379c..de8efbc7e4bd 100644
--- a/Documentation/sound/alsa/hda_codec.txt
+++ b/Documentation/sound/alsa/hda_codec.txt
@@ -114,7 +114,7 @@ For writing a sequence of verbs, use snd_hda_sequence_write().
114 114
115There are variants of cached read/write, snd_hda_codec_write_cache(), 115There are variants of cached read/write, snd_hda_codec_write_cache(),
116snd_hda_sequence_write_cache(). These are used for recording the 116snd_hda_sequence_write_cache(). These are used for recording the
117register states for the power-mangement resume. When no PM is needed, 117register states for the power-management resume. When no PM is needed,
118these are equivalent with non-cached version. 118these are equivalent with non-cached version.
119 119
120To retrieve the number of sub nodes connected to the given node, use 120To retrieve the number of sub nodes connected to the given node, use
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 46f9684d0b29..9ac842be9b4f 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:-
62 o Mic - Mic (and optional Jack) 62 o Mic - Mic (and optional Jack)
63 o Line - Line Input/Output (and optional Jack) 63 o Line - Line Input/Output (and optional Jack)
64 o Speaker - Speaker 64 o Speaker - Speaker
65 o Supply - Power or clock supply widget used by other widgets.
65 o Pre - Special PRE widget (exec before all others) 66 o Pre - Special PRE widget (exec before all others)
66 o Post - Special POST widget (exec after all others) 67 o Post - Special POST widget (exec after all others)
67 68
@@ -116,6 +117,9 @@ SOC_DAPM_SINGLE("HiFi Playback Switch", WM8731_APANA, 4, 1, 0),
116SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls, 117SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls,
117 ARRAY_SIZE(wm8731_output_mixer_controls)), 118 ARRAY_SIZE(wm8731_output_mixer_controls)),
118 119
120If you dont want the mixer elements prefixed with the name of the mixer widget,
121you can use SND_SOC_DAPM_MIXER_NAMED_CTL instead. the parameters are the same
122as for SND_SOC_DAPM_MIXER.
119 123
1202.3 Platform/Machine domain Widgets 1242.3 Platform/Machine domain Widgets
121----------------------------------- 125-----------------------------------
diff --git a/Documentation/sound/alsa/soc/jack.txt b/Documentation/sound/alsa/soc/jack.txt
new file mode 100644
index 000000000000..fcf82a417293
--- /dev/null
+++ b/Documentation/sound/alsa/soc/jack.txt
@@ -0,0 +1,71 @@
1ASoC jack detection
2===================
3
4ALSA has a standard API for representing physical jacks to user space,
5the kernel side of which can be seen in include/sound/jack.h. ASoC
6provides a version of this API adding two additional features:
7
8 - It allows more than one jack detection method to work together on one
9 user visible jack. In embedded systems it is common for multiple
10 to be present on a single jack but handled by separate bits of
11 hardware.
12
13 - Integration with DAPM, allowing DAPM endpoints to be updated
14 automatically based on the detected jack status (eg, turning off the
15 headphone outputs if no headphones are present).
16
17This is done by splitting the jacks up into three things working
18together: the jack itself represented by a struct snd_soc_jack, sets of
19snd_soc_jack_pins representing DAPM endpoints to update and blocks of
20code providing jack reporting mechanisms.
21
22For example, a system may have a stereo headset jack with two reporting
23mechanisms, one for the headphone and one for the microphone. Some
24systems won't be able to use their speaker output while a headphone is
25connected and so will want to make sure to update both speaker and
26headphone when the headphone jack status changes.
27
28The jack - struct snd_soc_jack
29==============================
30
31This represents a physical jack on the system and is what is visible to
32user space. The jack itself is completely passive, it is set up by the
33machine driver and updated by jack detection methods.
34
35Jacks are created by the machine driver calling snd_soc_jack_new().
36
37snd_soc_jack_pin
38================
39
40These represent a DAPM pin to update depending on some of the status
41bits supported by the jack. Each snd_soc_jack has zero or more of these
42which are updated automatically. They are created by the machine driver
43and associated with the jack using snd_soc_jack_add_pins(). The status
44of the endpoint may configured to be the opposite of the jack status if
45required (eg, enabling a built in microphone if a microphone is not
46connected via a jack).
47
48Jack detection methods
49======================
50
51Actual jack detection is done by code which is able to monitor some
52input to the system and update a jack by calling snd_soc_jack_report(),
53specifying a subset of bits to update. The jack detection code should
54be set up by the machine driver, taking configuration for the jack to
55update and the set of things to report when the jack is connected.
56
57Often this is done based on the status of a GPIO - a handler for this is
58provided by the snd_soc_jack_add_gpio() function. Other methods are
59also available, for example integrated into CODECs. One example of
60CODEC integrated jack detection can be see in the WM8350 driver.
61
62Each jack may have multiple reporting mechanisms, though it will need at
63least one to be useful.
64
65Machine drivers
66===============
67
68These are all hooked together by the machine driver depending on the
69system hardware. The machine driver will set up the snd_soc_jack and
70the list of pins to update then set up one or more jack detection
71mechanisms to update that jack based on their current status.
diff --git a/Documentation/sound/oss/CS4232 b/Documentation/sound/oss/CS4232
deleted file mode 100644
index 7d6af7a5c1c2..000000000000
--- a/Documentation/sound/oss/CS4232
+++ /dev/null
@@ -1,23 +0,0 @@
1To configure the Crystal CS423x sound chip and activate its DSP functions,
2modules may be loaded in this order:
3
4 modprobe sound
5 insmod ad1848
6 insmod uart401
7 insmod cs4232 io=* irq=* dma=* dma2=*
8
9This is the meaning of the parameters:
10
11 io--I/O address of the Windows Sound System (normally 0x534)
12 irq--IRQ of this device
13 dma and dma2--DMA channels (DMA2 may be 0)
14
15On some cards, the board attempts to do non-PnP setup, and fails. If you
16have problems, use Linux' PnP facilities.
17
18To get MIDI facilities add
19
20 insmod opl3 io=*
21
22where "io" is the I/O address of the OPL3 synthesizer. This will be shown
23in /proc/sys/pnp and is normally 0x388.
diff --git a/Documentation/sound/oss/Introduction b/Documentation/sound/oss/Introduction
index f04ba6bb7395..75d967ff9266 100644
--- a/Documentation/sound/oss/Introduction
+++ b/Documentation/sound/oss/Introduction
@@ -80,7 +80,7 @@ Notes:
80 additional features. 80 additional features.
81 81
822. The commercial OSS driver may be obtained from the site: 822. The commercial OSS driver may be obtained from the site:
83 http://www/opensound.com. This may be used for cards that 83 http://www.opensound.com. This may be used for cards that
84 are unsupported by the kernel driver, or may be used 84 are unsupported by the kernel driver, or may be used
85 by other operating systems. 85 by other operating systems.
86 86
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt
index 42f43fa59f24..34c76a55bc04 100644
--- a/Documentation/sparse.txt
+++ b/Documentation/sparse.txt
@@ -42,6 +42,14 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian
42vs cpu-endian vs whatever), and there the constant "0" really _is_ 42vs cpu-endian vs whatever), and there the constant "0" really _is_
43special. 43special.
44 44
45__bitwise__ - to be used for relatively compact stuff (gfp_t, etc.) that
46is mostly warning-free and is supposed to stay that way. Warnings will
47be generated without __CHECK_ENDIAN__.
48
49__bitwise - noisy stuff; in particular, __le*/__be* are that. We really
50don't want to drown in noise unless we'd explicitly asked for it.
51
52
45Getting sparse 53Getting sparse
46~~~~~~~~~~~~~~ 54~~~~~~~~~~~~~~
47 55
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index 0f5122eb282b..4a02d2508bc8 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -511,10 +511,16 @@ SPI MASTER METHODS
511 This sets up the device clock rate, SPI mode, and word sizes. 511 This sets up the device clock rate, SPI mode, and word sizes.
512 Drivers may change the defaults provided by board_info, and then 512 Drivers may change the defaults provided by board_info, and then
513 call spi_setup(spi) to invoke this routine. It may sleep. 513 call spi_setup(spi) to invoke this routine. It may sleep.
514
514 Unless each SPI slave has its own configuration registers, don't 515 Unless each SPI slave has its own configuration registers, don't
515 change them right away ... otherwise drivers could corrupt I/O 516 change them right away ... otherwise drivers could corrupt I/O
516 that's in progress for other SPI devices. 517 that's in progress for other SPI devices.
517 518
519 ** BUG ALERT: for some reason the first version of
520 ** many spi_master drivers seems to get this wrong.
521 ** When you code setup(), ASSUME that the controller
522 ** is actively processing transfers for another device.
523
518 master->transfer(struct spi_device *spi, struct spi_message *message) 524 master->transfer(struct spi_device *spi, struct spi_message *message)
519 This must not sleep. Its responsibility is arrange that the 525 This must not sleep. Its responsibility is arrange that the
520 transfer happens and its complete() callback is issued. The two 526 transfer happens and its complete() callback is issued. The two
diff --git a/Documentation/sysctl/00-INDEX b/Documentation/sysctl/00-INDEX
index a20a9066dc4c..1286f455992f 100644
--- a/Documentation/sysctl/00-INDEX
+++ b/Documentation/sysctl/00-INDEX
@@ -10,6 +10,8 @@ fs.txt
10 - documentation for /proc/sys/fs/*. 10 - documentation for /proc/sys/fs/*.
11kernel.txt 11kernel.txt
12 - documentation for /proc/sys/kernel/*. 12 - documentation for /proc/sys/kernel/*.
13net.txt
14 - documentation for /proc/sys/net/*.
13sunrpc.txt 15sunrpc.txt
14 - documentation for /proc/sys/sunrpc/*. 16 - documentation for /proc/sys/sunrpc/*.
15vm.txt 17vm.txt
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index f99254327ae5..1458448436cc 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -1,5 +1,6 @@
1Documentation for /proc/sys/fs/* kernel version 2.2.10 1Documentation for /proc/sys/fs/* kernel version 2.2.10
2 (c) 1998, 1999, Rik van Riel <riel@nl.linux.org> 2 (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
3 (c) 2009, Shen Feng<shen@cn.fujitsu.com>
3 4
4For general info and legal blurb, please look in README. 5For general info and legal blurb, please look in README.
5 6
@@ -14,7 +15,12 @@ kernel. Since some of the files _can_ be used to screw up your
14system, it is advisable to read both documentation and source 15system, it is advisable to read both documentation and source
15before actually making adjustments. 16before actually making adjustments.
16 17
181. /proc/sys/fs
19----------------------------------------------------------
20
17Currently, these files are in /proc/sys/fs: 21Currently, these files are in /proc/sys/fs:
22- aio-max-nr
23- aio-nr
18- dentry-state 24- dentry-state
19- dquot-max 25- dquot-max
20- dquot-nr 26- dquot-nr
@@ -30,8 +36,15 @@ Currently, these files are in /proc/sys/fs:
30- super-max 36- super-max
31- super-nr 37- super-nr
32 38
33Documentation for the files in /proc/sys/fs/binfmt_misc is 39==============================================================
34in Documentation/binfmt_misc.txt. 40
41aio-nr & aio-max-nr:
42
43aio-nr is the running total of the number of events specified on the
44io_setup system call for all currently active aio contexts. If aio-nr
45reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
46raising aio-max-nr does not result in the pre-allocation or re-sizing
47of any kernel data structures.
35 48
36============================================================== 49==============================================================
37 50
@@ -178,3 +191,60 @@ requests. aio-max-nr allows you to change the maximum value
178aio-nr can grow to. 191aio-nr can grow to.
179 192
180============================================================== 193==============================================================
194
195
1962. /proc/sys/fs/binfmt_misc
197----------------------------------------------------------
198
199Documentation for the files in /proc/sys/fs/binfmt_misc is
200in Documentation/binfmt_misc.txt.
201
202
2033. /proc/sys/fs/mqueue - POSIX message queues filesystem
204----------------------------------------------------------
205
206The "mqueue" filesystem provides the necessary kernel features to enable the
207creation of a user space library that implements the POSIX message queues
208API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
209Interfaces specification.)
210
211The "mqueue" filesystem contains values for determining/setting the amount of
212resources used by the file system.
213
214/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
215maximum number of message queues allowed on the system.
216
217/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
218maximum number of messages in a queue value. In fact it is the limiting value
219for another (user) limit which is set in mq_open invocation. This attribute of
220a queue must be less or equal then msg_max.
221
222/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
223maximum message size value (it is every message queue's attribute set during
224its creation).
225
226
2274. /proc/sys/fs/epoll - Configuration options for the epoll interface
228--------------------------------------------------------
229
230This directory contains configuration options for the epoll(7) interface.
231
232max_user_instances
233------------------
234
235This is the maximum number of epoll file descriptors that a single user can
236have open at a given time. The default value is 128, and should be enough
237for normal users.
238
239max_user_watches
240----------------
241
242Every epoll file descriptor can store a number of files to be monitored
243for event readiness. Each one of these monitored files constitutes a "watch".
244This configuration option sets the maximum number of "watches" that are
245allowed for each user.
246Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
247on a 64bit one.
248The current default value for max_user_watches is the 1/32 of the available
249low memory, divided for the "watch" cost in bytes.
250
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a4ccdd1981cf..322a00bb99d9 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -1,5 +1,6 @@
1Documentation for /proc/sys/kernel/* kernel version 2.2.10 1Documentation for /proc/sys/kernel/* kernel version 2.2.10
2 (c) 1998, 1999, Rik van Riel <riel@nl.linux.org> 2 (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
3 (c) 2009, Shen Feng<shen@cn.fujitsu.com>
3 4
4For general info and legal blurb, please look in README. 5For general info and legal blurb, please look in README.
5 6
@@ -18,6 +19,7 @@ Currently, these files might (depending on your configuration)
18show up in /proc/sys/kernel: 19show up in /proc/sys/kernel:
19- acpi_video_flags 20- acpi_video_flags
20- acct 21- acct
22- auto_msgmni
21- core_pattern 23- core_pattern
22- core_uses_pid 24- core_uses_pid
23- ctrl-alt-del 25- ctrl-alt-del
@@ -30,9 +32,11 @@ show up in /proc/sys/kernel:
30- kstack_depth_to_print [ X86 only ] 32- kstack_depth_to_print [ X86 only ]
31- l2cr [ PPC only ] 33- l2cr [ PPC only ]
32- modprobe ==> Documentation/debugging-modules.txt 34- modprobe ==> Documentation/debugging-modules.txt
35- modules_disabled
33- msgmax 36- msgmax
34- msgmnb 37- msgmnb
35- msgmni 38- msgmni
39- nmi_watchdog
36- osrelease 40- osrelease
37- ostype 41- ostype
38- overflowgid 42- overflowgid
@@ -40,6 +44,7 @@ show up in /proc/sys/kernel:
40- panic 44- panic
41- pid_max 45- pid_max
42- powersave-nap [ PPC only ] 46- powersave-nap [ PPC only ]
47- panic_on_unrecovered_nmi
43- printk 48- printk
44- randomize_va_space 49- randomize_va_space
45- real-root-dev ==> Documentation/initrd.txt 50- real-root-dev ==> Documentation/initrd.txt
@@ -55,6 +60,7 @@ show up in /proc/sys/kernel:
55- sysrq ==> Documentation/sysrq.txt 60- sysrq ==> Documentation/sysrq.txt
56- tainted 61- tainted
57- threads-max 62- threads-max
63- unknown_nmi_panic
58- version 64- version
59 65
60============================================================== 66==============================================================
@@ -179,6 +185,16 @@ kernel stack.
179 185
180============================================================== 186==============================================================
181 187
188modules_disabled:
189
190A toggle value indicating if modules are allowed to be loaded
191in an otherwise modular kernel. This toggle defaults to off
192(0), but can be set true (1). Once true, modules can be
193neither loaded nor unloaded, and the toggle cannot be set back
194to false.
195
196==============================================================
197
182osrelease, ostype & version: 198osrelease, ostype & version:
183 199
184# cat osrelease 200# cat osrelease
@@ -381,3 +397,51 @@ can be ORed together:
381 512 - A kernel warning has occurred. 397 512 - A kernel warning has occurred.
3821024 - A module from drivers/staging was loaded. 3981024 - A module from drivers/staging was loaded.
383 399
400==============================================================
401
402auto_msgmni:
403
404Enables/Disables automatic recomputing of msgmni upon memory add/remove or
405upon ipc namespace creation/removal (see the msgmni description above).
406Echoing "1" into this file enables msgmni automatic recomputing.
407Echoing "0" turns it off.
408auto_msgmni default value is 1.
409
410==============================================================
411
412nmi_watchdog:
413
414Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
415the NMI watchdog is enabled and will continuously test all online cpus to
416determine whether or not they are still functioning properly. Currently,
417passing "nmi_watchdog=" parameter at boot time is required for this function
418to work.
419
420If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
421NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
422oprofile may have more registers to utilize.
423
424==============================================================
425
426unknown_nmi_panic:
427
428The value in this file affects behavior of handling NMI. When the value is
429non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
430debugging information is displayed on console.
431
432NMI switch that most IA32 servers have fires unknown NMI up, for example.
433If a system hangs up, try pressing the NMI switch.
434
435==============================================================
436
437panic_on_unrecovered_nmi:
438
439The default Linux behaviour on an NMI of either memory or unknown is to continue
440operation. For many environments such as scientific computing it is preferable
441that the box is taken out and the error dealt with than an uncorrected
442parity/ECC error get propogated.
443
444A small number of systems do generate NMI's for bizarre random reasons such as
445power management so the default is off. That sysctl works like the existing
446panic controls already in that directory.
447
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
new file mode 100644
index 000000000000..df38ef046f8d
--- /dev/null
+++ b/Documentation/sysctl/net.txt
@@ -0,0 +1,175 @@
1Documentation for /proc/sys/net/* kernel version 2.4.0-test11-pre4
2 (c) 1999 Terrehon Bowden <terrehon@pacbell.net>
3 Bodo Bauer <bb@ricochet.net>
4 (c) 2000 Jorge Nerin <comandante@zaralinux.com>
5 (c) 2009 Shen Feng <shen@cn.fujitsu.com>
6
7For general info and legal blurb, please look in README.
8
9==============================================================
10
11This file contains the documentation for the sysctl files in
12/proc/sys/net and is valid for Linux kernel version 2.4.0-test11-pre4.
13
14The interface to the networking parts of the kernel is located in
15/proc/sys/net. The following table shows all possible subdirectories.You may
16see only some of them, depending on your kernel's configuration.
17
18
19Table : Subdirectories in /proc/sys/net
20..............................................................................
21 Directory Content Directory Content
22 core General parameter appletalk Appletalk protocol
23 unix Unix domain sockets netrom NET/ROM
24 802 E802 protocol ax25 AX25
25 ethernet Ethernet protocol rose X.25 PLP layer
26 ipv4 IP version 4 x25 X.25 protocol
27 ipx IPX token-ring IBM token ring
28 bridge Bridging decnet DEC net
29 ipv6 IP version 6
30..............................................................................
31
321. /proc/sys/net/core - Network core options
33-------------------------------------------------------
34
35rmem_default
36------------
37
38The default setting of the socket receive buffer in bytes.
39
40rmem_max
41--------
42
43The maximum receive socket buffer size in bytes.
44
45wmem_default
46------------
47
48The default setting (in bytes) of the socket send buffer.
49
50wmem_max
51--------
52
53The maximum send socket buffer size in bytes.
54
55message_burst and message_cost
56------------------------------
57
58These parameters are used to limit the warning messages written to the kernel
59log from the networking code. They enforce a rate limit to make a
60denial-of-service attack impossible. A higher message_cost factor, results in
61fewer messages that will be written. Message_burst controls when messages will
62be dropped. The default settings limit warning messages to one every five
63seconds.
64
65warnings
66--------
67
68This controls console messages from the networking stack that can occur because
69of problems on the network like duplicate address or bad checksums. Normally,
70this should be enabled, but if the problem persists the messages can be
71disabled.
72
73netdev_budget
74-------------
75
76Maximum number of packets taken from all interfaces in one polling cycle (NAPI
77poll). In one polling cycle interfaces which are registered to polling are
78probed in a round-robin manner. The limit of packets in one such probe can be
79set per-device via sysfs class/net/<device>/weight .
80
81netdev_max_backlog
82------------------
83
84Maximum number of packets, queued on the INPUT side, when the interface
85receives packets faster than kernel can process them.
86
87optmem_max
88----------
89
90Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
91of struct cmsghdr structures with appended data.
92
932. /proc/sys/net/unix - Parameters for Unix domain sockets
94-------------------------------------------------------
95
96There is only one file in this directory.
97unix_dgram_qlen limits the max number of datagrams queued in Unix domain
98socket's buffer. It will not take effect unless PF_UNIX flag is specified.
99
100
1013. /proc/sys/net/ipv4 - IPV4 settings
102-------------------------------------------------------
103Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
104descriptions of these entries.
105
106
1074. Appletalk
108-------------------------------------------------------
109
110The /proc/sys/net/appletalk directory holds the Appletalk configuration data
111when Appletalk is loaded. The configurable parameters are:
112
113aarp-expiry-time
114----------------
115
116The amount of time we keep an ARP entry before expiring it. Used to age out
117old hosts.
118
119aarp-resolve-time
120-----------------
121
122The amount of time we will spend trying to resolve an Appletalk address.
123
124aarp-retransmit-limit
125---------------------
126
127The number of times we will retransmit a query before giving up.
128
129aarp-tick-time
130--------------
131
132Controls the rate at which expires are checked.
133
134The directory /proc/net/appletalk holds the list of active Appletalk sockets
135on a machine.
136
137The fields indicate the DDP type, the local address (in network:node format)
138the remote address, the size of the transmit pending queue, the size of the
139received queue (bytes waiting for applications to read) the state and the uid
140owning the socket.
141
142/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
143shows the name of the interface, its Appletalk address, the network range on
144that address (or network number for phase 1 networks), and the status of the
145interface.
146
147/proc/net/atalk_route lists each known network route. It lists the target
148(network) that the route leads to, the router (may be directly connected), the
149route flags, and the device the route is using.
150
151
1525. IPX
153-------------------------------------------------------
154
155The IPX protocol has no tunable values in proc/sys/net.
156
157The IPX protocol does, however, provide proc/net/ipx. This lists each IPX
158socket giving the local and remote addresses in Novell format (that is
159network:node:port). In accordance with the strange Novell tradition,
160everything but the port is in hex. Not_Connected is displayed for sockets that
161are not tied to a specific remote address. The Tx and Rx queue sizes indicate
162the number of bytes pending for transmission and reception. The state
163indicates the state the socket is in and the uid is the owning uid of the
164socket.
165
166The /proc/net/ipx_interface file lists all IPX interfaces. For each interface
167it gives the network number, the node number, and indicates if the network is
168the primary network. It also indicates which device it is bound to (or
169Internal for internal networks) and the Frame Type if appropriate. Linux
170supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for
171IPX.
172
173The /proc/net/ipx_route table holds a list of IPX routes. For each route it
174gives the destination network, the router node (or Directly) and the network
175address of the router (or Connected) for internal networks.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 3197fc83bc51..c4de6359d440 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -88,6 +88,10 @@ will itself start writeback.
88If dirty_bytes is written, dirty_ratio becomes a function of its value 88If dirty_bytes is written, dirty_ratio becomes a function of its value
89(dirty_bytes / the amount of dirtyable system memory). 89(dirty_bytes / the amount of dirtyable system memory).
90 90
91Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
92value lower than this limit will be ignored and the old configuration will be
93retained.
94
91============================================================== 95==============================================================
92 96
93dirty_expire_centisecs 97dirty_expire_centisecs
@@ -229,8 +233,8 @@ These protections are added to score to judge whether this zone should be used
229for page allocation or should be reclaimed. 233for page allocation or should be reclaimed.
230 234
231In this example, if normal pages (index=2) are required to this DMA zone and 235In this example, if normal pages (index=2) are required to this DMA zone and
232pages_high is used for watermark, the kernel judges this zone should not be 236watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
233used because pages_free(1355) is smaller than watermark + protection[2] 237not be used because pages_free(1355) is smaller than watermark + protection[2]
234(4 + 2004 = 2008). If this protection value is 0, this zone would be used for 238(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
235normal page requirement. If requirement is DMA zone(index=0), protection[0] 239normal page requirement. If requirement is DMA zone(index=0), protection[0]
236(=0) is used. 240(=0) is used.
@@ -276,9 +280,10 @@ The default value is 65536.
276min_free_kbytes: 280min_free_kbytes:
277 281
278This is used to force the Linux VM to keep a minimum number 282This is used to force the Linux VM to keep a minimum number
279of kilobytes free. The VM uses this number to compute a pages_min 283of kilobytes free. The VM uses this number to compute a
280value for each lowmem zone in the system. Each lowmem zone gets 284watermark[WMARK_MIN] value for each lowmem zone in the system.
281a number of reserved free pages based proportionally on its size. 285Each lowmem zone gets a number of reserved free pages based
286proportionally on its size.
282 287
283Some minimal amount of memory is needed to satisfy PF_MEMALLOC 288Some minimal amount of memory is needed to satisfy PF_MEMALLOC
284allocations; if you set this to lower than 1024KB, your system will 289allocations; if you set this to lower than 1024KB, your system will
@@ -310,10 +315,14 @@ min_unmapped_ratio:
310 315
311This is available only on NUMA kernels. 316This is available only on NUMA kernels.
312 317
313A percentage of the total pages in each zone. Zone reclaim will only 318This is a percentage of the total pages in each zone. Zone reclaim will
314occur if more than this percentage of pages are file backed and unmapped. 319only occur if more than this percentage of pages are in a state that
315This is to insure that a minimal amount of local pages is still available for 320zone_reclaim_mode allows to be reclaimed.
316file I/O even if the node is overallocated. 321
322If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
323against all file-backed unmapped pages including swapcache pages and tmpfs
324files. Otherwise, only unmapped pages backed by normal files but not tmpfs
325files and similar are considered.
317 326
318The default is 1 percent. 327The default is 1 percent.
319 328
@@ -354,7 +363,7 @@ nr_pdflush_threads
354The current number of pdflush threads. This value is read-only. 363The current number of pdflush threads. This value is read-only.
355The value changes according to the number of dirty pages in the system. 364The value changes according to the number of dirty pages in the system.
356 365
357When neccessary, additional pdflush threads are created, one per second, up to 366When necessary, additional pdflush threads are created, one per second, up to
358nr_pdflush_threads_max. 367nr_pdflush_threads_max.
359 368
360============================================================== 369==============================================================
@@ -561,7 +570,7 @@ swappiness
561 570
562This control is used to define how aggressive the kernel will swap 571This control is used to define how aggressive the kernel will swap
563memory pages. Higher values will increase agressiveness, lower values 572memory pages. Higher values will increase agressiveness, lower values
564descrease the amount of swap. 573decrease the amount of swap.
565 574
566The default value is 60. 575The default value is 60.
567 576
diff --git a/Documentation/sysfs-rules.txt b/Documentation/sysfs-rules.txt
index 6049a2a84dda..5d8bc2cd250c 100644
--- a/Documentation/sysfs-rules.txt
+++ b/Documentation/sysfs-rules.txt
@@ -113,7 +113,7 @@ versions of the sysfs interface.
113 "devices" directory at /sys/subsystem/<name>/devices. 113 "devices" directory at /sys/subsystem/<name>/devices.
114 114
115 If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be 115 If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be
116 ignored. If it does not exist, you have always to scan all three 116 ignored. If it does not exist, you always have to scan all three
117 places, as the kernel is free to move a subsystem from one place to 117 places, as the kernel is free to move a subsystem from one place to
118 the other, as long as the devices are still reachable by the same 118 the other, as long as the devices are still reachable by the same
119 subsystem name. 119 subsystem name.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 9e592c718afb..cf42b820ff9d 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -81,6 +81,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
81 81
82'i' - Send a SIGKILL to all processes, except for init. 82'i' - Send a SIGKILL to all processes, except for init.
83 83
84'j' - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl.
85
84'k' - Secure Access Key (SAK) Kills all programs on the current virtual 86'k' - Secure Access Key (SAK) Kills all programs on the current virtual
85 console. NOTE: See important comments below in SAK section. 87 console. NOTE: See important comments below in SAK section.
86 88
@@ -113,6 +115,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
113 115
114'x' - Used by xmon interface on ppc/powerpc platforms. 116'x' - Used by xmon interface on ppc/powerpc platforms.
115 117
118'z' - Dump the ftrace buffer
119
116'0'-'9' - Sets the console log level, controlling which kernel messages 120'0'-'9' - Sets the console log level, controlling which kernel messages
117 will be printed to your console. ('0', for example would make 121 will be printed to your console. ('0', for example would make
118 it so that only emergency messages like PANICs or OOPSes would 122 it so that only emergency messages like PANICs or OOPSes would
@@ -160,6 +164,9 @@ t'E'rm and k'I'll are useful if you have some sort of runaway process you
160are unable to kill any other way, especially if it's spawning other 164are unable to kill any other way, especially if it's spawning other
161processes. 165processes.
162 166
167"'J'ust thaw it" is useful if your system becomes unresponsive due to a frozen
168(probably root) filesystem via the FIFREEZE ioctl.
169
163* Sometimes SysRq seems to get 'stuck' after using it, what can I do? 170* Sometimes SysRq seems to get 'stuck' after using it, what can I do?
164~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 171~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
165That happens to me, also. I've found that tapping shift, alt, and control 172That happens to me, also. I've found that tapping shift, alt, and control
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt
index e7c09abcfab4..04763a325520 100644
--- a/Documentation/timers/hpet.txt
+++ b/Documentation/timers/hpet.txt
@@ -7,7 +7,7 @@ by Intel and Microsoft which can be found at
7 7
8Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") 8Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
9and up to 32 comparators. Normally three or more comparators are provided, 9and up to 32 comparators. Normally three or more comparators are provided,
10each of which can generate oneshot interupts and at least one of which has 10each of which can generate oneshot interrupts and at least one of which has
11additional hardware to support periodic interrupts. The comparators are 11additional hardware to support periodic interrupts. The comparators are
12also called "timers", which can be misleading since usually timers are 12also called "timers", which can be misleading since usually timers are
13independent of each other ... these share a counter, complicating resets. 13independent of each other ... these share a counter, complicating resets.
diff --git a/Documentation/timers/timer_stats.txt b/Documentation/timers/timer_stats.txt
index 20d368c59814..9bd00fc2e823 100644
--- a/Documentation/timers/timer_stats.txt
+++ b/Documentation/timers/timer_stats.txt
@@ -62,7 +62,7 @@ Timerstats sample period: 3.888770 s
62 62
63The first column is the number of events, the second column the pid, the third 63The first column is the number of events, the second column the pid, the third
64column is the name of the process. The forth column shows the function which 64column is the name of the process. The forth column shows the function which
65initialized the timer and in parantheses the callback function which was 65initialized the timer and in parenthesis the callback function which was
66executed on expiry. 66executed on expiry.
67 67
68 Thomas, Ingo 68 Thomas, Ingo
diff --git a/Documentation/tomoyo.txt b/Documentation/tomoyo.txt
new file mode 100644
index 000000000000..b3a232cae7f8
--- /dev/null
+++ b/Documentation/tomoyo.txt
@@ -0,0 +1,55 @@
1--- What is TOMOYO? ---
2
3TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
4
5LiveCD-based tutorials are available at
6http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/ubuntu8.04-live/
7http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/centos5-live/ .
8Though these tutorials use non-LSM version of TOMOYO, they are useful for you
9to know what TOMOYO is.
10
11--- How to enable TOMOYO? ---
12
13Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
14kernel's command line.
15
16Please see http://tomoyo.sourceforge.jp/en/2.2.x/ for details.
17
18--- Where is documentation? ---
19
20User <-> Kernel interface documentation is available at
21http://tomoyo.sourceforge.jp/en/2.2.x/policy-reference.html .
22
23Materials we prepared for seminars and symposiums are available at
24http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
25Below lists are chosen from three aspects.
26
27What is TOMOYO?
28 TOMOYO Linux Overview
29 http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
30 TOMOYO Linux: pragmatic and manageable security for Linux
31 http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
32 TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
33 http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
34
35What can TOMOYO do?
36 Deep inside TOMOYO Linux
37 http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
38 The role of "pathname based access control" in security.
39 http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
40
41History of TOMOYO?
42 Realities of Mainlining
43 http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
44
45--- What is future plan? ---
46
47We believe that inode based security and name based security are complementary
48and both should be used together. But unfortunately, so far, we cannot enable
49multiple LSM modules at the same time. We feel sorry that you have to give up
50SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
51
52We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
53version of TOMOYO, available at http://tomoyo.sourceforge.jp/en/1.6.x/ .
54LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
55to port non-LSM version's functionalities to LSM versions.
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 000000000000..f157d7594ea7
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,90 @@
1 Event Tracing
2
3 Documentation written by Theodore Ts'o
4 Updated by Li Zefan
5
61. Introduction
7===============
8
9Tracepoints (see Documentation/trace/tracepoints.txt) can be used
10without creating custom kernel modules to register probe functions
11using the event tracing infrastructure.
12
13Not all tracepoints can be traced using the event tracing system;
14the kernel developer must provide code snippets which define how the
15tracing information is saved into the tracing buffer, and how the
16tracing information should be printed.
17
182. Using Event Tracing
19======================
20
212.1 Via the 'set_event' interface
22---------------------------------
23
24The events which are available for tracing can be found in the file
25/debug/tracing/available_events.
26
27To enable a particular event, such as 'sched_wakeup', simply echo it
28to /debug/tracing/set_event. For example:
29
30 # echo sched_wakeup >> /debug/tracing/set_event
31
32[ Note: '>>' is necessary, otherwise it will firstly disable
33 all the events. ]
34
35To disable an event, echo the event name to the set_event file prefixed
36with an exclamation point:
37
38 # echo '!sched_wakeup' >> /debug/tracing/set_event
39
40To disable all events, echo an empty line to the set_event file:
41
42 # echo > /debug/tracing/set_event
43
44To enable all events, echo '*:*' or '*:' to the set_event file:
45
46 # echo *:* > /debug/tracing/set_event
47
48The events are organized into subsystems, such as ext4, irq, sched,
49etc., and a full event name looks like this: <subsystem>:<event>. The
50subsystem name is optional, but it is displayed in the available_events
51file. All of the events in a subsystem can be specified via the syntax
52"<subsystem>:*"; for example, to enable all irq events, you can use the
53command:
54
55 # echo 'irq:*' > /debug/tracing/set_event
56
572.2 Via the 'enable' toggle
58---------------------------
59
60The events available are also listed in /debug/tracing/events/ hierarchy
61of directories.
62
63To enable event 'sched_wakeup':
64
65 # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
66
67To disable it:
68
69 # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
70
71To enable all events in sched subsystem:
72
73 # echo 1 > /debug/tracing/events/sched/enable
74
75To eanble all events:
76
77 # echo 1 > /debug/tracing/events/enable
78
79When reading one of these enable files, there are four results:
80
81 0 - all events this file affects are disabled
82 1 - all events this file affects are enabled
83 X - there is a mixture of events enabled and disabled
84 ? - this file does not affect any event
85
863. Defining an event-enabled tracepoint
87=======================================
88
89See The example provided in samples/trace_events
90
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
new file mode 100644
index 000000000000..a39b3c749de5
--- /dev/null
+++ b/Documentation/trace/ftrace.txt
@@ -0,0 +1,1888 @@
1 ftrace - Function Tracer
2 ========================
3
4Copyright 2008 Red Hat Inc.
5 Author: Steven Rostedt <srostedt@redhat.com>
6 License: The GNU Free Documentation License, Version 1.2
7 (dual licensed under the GPL v2)
8Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton,
9 John Kacur, and David Teigland.
10Written for: 2.6.28-rc2
11
12Introduction
13------------
14
15Ftrace is an internal tracer designed to help out developers and
16designers of systems to find what is going on inside the kernel.
17It can be used for debugging or analyzing latencies and
18performance issues that take place outside of user-space.
19
20Although ftrace is the function tracer, it also includes an
21infrastructure that allows for other types of tracing. Some of
22the tracers that are currently in ftrace include a tracer to
23trace context switches, the time it takes for a high priority
24task to run after it was woken up, the time interrupts are
25disabled, and more (ftrace allows for tracer plugins, which
26means that the list of tracers can always grow).
27
28
29The File System
30---------------
31
32Ftrace uses the debugfs file system to hold the control files as
33well as the files to display output.
34
35When debugfs is configured into the kernel (which selecting any ftrace
36option will do) the directory /sys/kernel/debug will be created. To mount
37this directory, you can add to your /etc/fstab file:
38
39 debugfs /sys/kernel/debug debugfs defaults 0 0
40
41Or you can mount it at run time with:
42
43 mount -t debugfs nodev /sys/kernel/debug
44
45For quicker access to that directory you may want to make a soft link to
46it:
47
48 ln -s /sys/kernel/debug /debug
49
50Any selected ftrace option will also create a directory called tracing
51within the debugfs. The rest of the document will assume that you are in
52the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate
53on the files within that directory and not distract from the content with
54the extended "/sys/kernel/debug/tracing" path name.
55
56That's it! (assuming that you have ftrace configured into your kernel)
57
58After mounting the debugfs, you can see a directory called
59"tracing". This directory contains the control and output files
60of ftrace. Here is a list of some of the key files:
61
62
63 Note: all time values are in microseconds.
64
65 current_tracer:
66
67 This is used to set or display the current tracer
68 that is configured.
69
70 available_tracers:
71
72 This holds the different types of tracers that
73 have been compiled into the kernel. The
74 tracers listed here can be configured by
75 echoing their name into current_tracer.
76
77 tracing_enabled:
78
79 This sets or displays whether the current_tracer
80 is activated and tracing or not. Echo 0 into this
81 file to disable the tracer or 1 to enable it.
82
83 trace:
84
85 This file holds the output of the trace in a human
86 readable format (described below).
87
88 latency_trace:
89
90 This file shows the same trace but the information
91 is organized more to display possible latencies
92 in the system (described below).
93
94 trace_pipe:
95
96 The output is the same as the "trace" file but this
97 file is meant to be streamed with live tracing.
98 Reads from this file will block until new data
99 is retrieved. Unlike the "trace" and "latency_trace"
100 files, this file is a consumer. This means reading
101 from this file causes sequential reads to display
102 more current data. Once data is read from this
103 file, it is consumed, and will not be read
104 again with a sequential read. The "trace" and
105 "latency_trace" files are static, and if the
106 tracer is not adding more data, they will display
107 the same information every time they are read.
108
109 trace_options:
110
111 This file lets the user control the amount of data
112 that is displayed in one of the above output
113 files.
114
115 tracing_max_latency:
116
117 Some of the tracers record the max latency.
118 For example, the time interrupts are disabled.
119 This time is saved in this file. The max trace
120 will also be stored, and displayed by either
121 "trace" or "latency_trace". A new max trace will
122 only be recorded if the latency is greater than
123 the value in this file. (in microseconds)
124
125 buffer_size_kb:
126
127 This sets or displays the number of kilobytes each CPU
128 buffer can hold. The tracer buffers are the same size
129 for each CPU. The displayed number is the size of the
130 CPU buffer and not total size of all buffers. The
131 trace buffers are allocated in pages (blocks of memory
132 that the kernel uses for allocation, usually 4 KB in size).
133 If the last page allocated has room for more bytes
134 than requested, the rest of the page will be used,
135 making the actual allocation bigger than requested.
136 ( Note, the size may not be a multiple of the page size
137 due to buffer managment overhead. )
138
139 This can only be updated when the current_tracer
140 is set to "nop".
141
142 tracing_cpumask:
143
144 This is a mask that lets the user only trace
145 on specified CPUS. The format is a hex string
146 representing the CPUS.
147
148 set_ftrace_filter:
149
150 When dynamic ftrace is configured in (see the
151 section below "dynamic ftrace"), the code is dynamically
152 modified (code text rewrite) to disable calling of the
153 function profiler (mcount). This lets tracing be configured
154 in with practically no overhead in performance. This also
155 has a side effect of enabling or disabling specific functions
156 to be traced. Echoing names of functions into this file
157 will limit the trace to only those functions.
158
159 set_ftrace_notrace:
160
161 This has an effect opposite to that of
162 set_ftrace_filter. Any function that is added here will not
163 be traced. If a function exists in both set_ftrace_filter
164 and set_ftrace_notrace, the function will _not_ be traced.
165
166 set_ftrace_pid:
167
168 Have the function tracer only trace a single thread.
169
170 set_graph_function:
171
172 Set a "trigger" function where tracing should start
173 with the function graph tracer (See the section
174 "dynamic ftrace" for more details).
175
176 available_filter_functions:
177
178 This lists the functions that ftrace
179 has processed and can trace. These are the function
180 names that you can pass to "set_ftrace_filter" or
181 "set_ftrace_notrace". (See the section "dynamic ftrace"
182 below for more details.)
183
184
185The Tracers
186-----------
187
188Here is the list of current tracers that may be configured.
189
190 "function"
191
192 Function call tracer to trace all kernel functions.
193
194 "function_graph"
195
196 Similar to the function tracer except that the
197 function tracer probes the functions on their entry
198 whereas the function graph tracer traces on both entry
199 and exit of the functions. It then provides the ability
200 to draw a graph of function calls similar to C code
201 source.
202
203 "sched_switch"
204
205 Traces the context switches and wakeups between tasks.
206
207 "irqsoff"
208
209 Traces the areas that disable interrupts and saves
210 the trace with the longest max latency.
211 See tracing_max_latency. When a new max is recorded,
212 it replaces the old trace. It is best to view this
213 trace via the latency_trace file.
214
215 "preemptoff"
216
217 Similar to irqsoff but traces and records the amount of
218 time for which preemption is disabled.
219
220 "preemptirqsoff"
221
222 Similar to irqsoff and preemptoff, but traces and
223 records the largest time for which irqs and/or preemption
224 is disabled.
225
226 "wakeup"
227
228 Traces and records the max latency that it takes for
229 the highest priority task to get scheduled after
230 it has been woken up.
231
232 "hw-branch-tracer"
233
234 Uses the BTS CPU feature on x86 CPUs to traces all
235 branches executed.
236
237 "nop"
238
239 This is the "trace nothing" tracer. To remove all
240 tracers from tracing simply echo "nop" into
241 current_tracer.
242
243
244Examples of using the tracer
245----------------------------
246
247Here are typical examples of using the tracers when controlling
248them only with the debugfs interface (without using any
249user-land utilities).
250
251Output format:
252--------------
253
254Here is an example of the output format of the file "trace"
255
256 --------
257# tracer: function
258#
259# TASK-PID CPU# TIMESTAMP FUNCTION
260# | | | | |
261 bash-4251 [01] 10152.583854: path_put <-path_walk
262 bash-4251 [01] 10152.583855: dput <-path_put
263 bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput
264 --------
265
266A header is printed with the tracer name that is represented by
267the trace. In this case the tracer is "function". Then a header
268showing the format. Task name "bash", the task PID "4251", the
269CPU that it was running on "01", the timestamp in <secs>.<usecs>
270format, the function name that was traced "path_put" and the
271parent function that called this function "path_walk". The
272timestamp is the time at which the function was entered.
273
274The sched_switch tracer also includes tracing of task wakeups
275and context switches.
276
277 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
278 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
279 ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R
280 events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R
281 kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
282 ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
283
284Wake ups are represented by a "+" and the context switches are
285shown as "==>". The format is:
286
287 Context switches:
288
289 Previous task Next Task
290
291 <pid>:<prio>:<state> ==> <pid>:<prio>:<state>
292
293 Wake ups:
294
295 Current task Task waking up
296
297 <pid>:<prio>:<state> + <pid>:<prio>:<state>
298
299The prio is the internal kernel priority, which is the inverse
300of the priority that is usually displayed by user-space tools.
301Zero represents the highest priority (99). Prio 100 starts the
302"nice" priorities with 100 being equal to nice -20 and 139 being
303nice 19. The prio "140" is reserved for the idle task which is
304the lowest priority thread (pid 0).
305
306
307Latency trace format
308--------------------
309
310For traces that display latency times, the latency_trace file
311gives somewhat more information to see why a latency happened.
312Here is a typical trace.
313
314# tracer: irqsoff
315#
316irqsoff latency trace v1.1.5 on 2.6.26-rc8
317--------------------------------------------------------------------
318 latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
319 -----------------
320 | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0)
321 -----------------
322 => started at: apic_timer_interrupt
323 => ended at: do_softirq
324
325# _------=> CPU#
326# / _-----=> irqs-off
327# | / _----=> need-resched
328# || / _---=> hardirq/softirq
329# ||| / _--=> preempt-depth
330# |||| /
331# ||||| delay
332# cmd pid ||||| time | caller
333# \ / ||||| \ | /
334 <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt)
335 <idle>-0 0d.s. 97us : __do_softirq (do_softirq)
336 <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq)
337
338
339This shows that the current tracer is "irqsoff" tracing the time
340for which interrupts were disabled. It gives the trace version
341and the version of the kernel upon which this was executed on
342(2.6.26-rc8). Then it displays the max latency in microsecs (97
343us). The number of trace entries displayed and the total number
344recorded (both are three: #3/3). The type of preemption that was
345used (PREEMPT). VP, KP, SP, and HP are always zero and are
346reserved for later use. #P is the number of online CPUS (#P:2).
347
348The task is the process that was running when the latency
349occurred. (swapper pid: 0).
350
351The start and stop (the functions in which the interrupts were
352disabled and enabled respectively) that caused the latencies:
353
354 apic_timer_interrupt is where the interrupts were disabled.
355 do_softirq is where they were enabled again.
356
357The next lines after the header are the trace itself. The header
358explains which is which.
359
360 cmd: The name of the process in the trace.
361
362 pid: The PID of that process.
363
364 CPU#: The CPU which the process was running on.
365
366 irqs-off: 'd' interrupts are disabled. '.' otherwise.
367 Note: If the architecture does not support a way to
368 read the irq flags variable, an 'X' will always
369 be printed here.
370
371 need-resched: 'N' task need_resched is set, '.' otherwise.
372
373 hardirq/softirq:
374 'H' - hard irq occurred inside a softirq.
375 'h' - hard irq is running
376 's' - soft irq is running
377 '.' - normal context.
378
379 preempt-depth: The level of preempt_disabled
380
381The above is mostly meaningful for kernel developers.
382
383 time: This differs from the trace file output. The trace file output
384 includes an absolute timestamp. The timestamp used by the
385 latency_trace file is relative to the start of the trace.
386
387 delay: This is just to help catch your eye a bit better. And
388 needs to be fixed to be only relative to the same CPU.
389 The marks are determined by the difference between this
390 current trace and the next trace.
391 '!' - greater than preempt_mark_thresh (default 100)
392 '+' - greater than 1 microsecond
393 ' ' - less than or equal to 1 microsecond.
394
395 The rest is the same as the 'trace' file.
396
397
398trace_options
399-------------
400
401The trace_options file is used to control what gets printed in
402the trace output. To see what is available, simply cat the file:
403
404 cat trace_options
405 print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
406 noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
407
408To disable one of the options, echo in the option prepended with
409"no".
410
411 echo noprint-parent > trace_options
412
413To enable an option, leave off the "no".
414
415 echo sym-offset > trace_options
416
417Here are the available options:
418
419 print-parent - On function traces, display the calling (parent)
420 function as well as the function being traced.
421
422 print-parent:
423 bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul
424
425 noprint-parent:
426 bash-4000 [01] 1477.606694: simple_strtoul
427
428
429 sym-offset - Display not only the function name, but also the
430 offset in the function. For example, instead of
431 seeing just "ktime_get", you will see
432 "ktime_get+0xb/0x20".
433
434 sym-offset:
435 bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0
436
437 sym-addr - this will also display the function address as well
438 as the function name.
439
440 sym-addr:
441 bash-4000 [01] 1477.606694: simple_strtoul <c0339346>
442
443 verbose - This deals with the latency_trace file.
444
445 bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
446 (+0.000ms): simple_strtoul (strict_strtoul)
447
448 raw - This will display raw numbers. This option is best for
449 use with user applications that can translate the raw
450 numbers better than having it done in the kernel.
451
452 hex - Similar to raw, but the numbers will be in a hexadecimal
453 format.
454
455 bin - This will print out the formats in raw binary.
456
457 block - TBD (needs update)
458
459 stacktrace - This is one of the options that changes the trace
460 itself. When a trace is recorded, so is the stack
461 of functions. This allows for back traces of
462 trace sites.
463
464 userstacktrace - This option changes the trace. It records a
465 stacktrace of the current userspace thread.
466
467 sym-userobj - when user stacktrace are enabled, look up which
468 object the address belongs to, and print a
469 relative address. This is especially useful when
470 ASLR is on, otherwise you don't get a chance to
471 resolve the address to object/file/line after
472 the app is no longer running
473
474 The lookup is performed when you read
475 trace,trace_pipe,latency_trace. Example:
476
477 a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
478x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
479
480 sched-tree - trace all tasks that are on the runqueue, at
481 every scheduling event. Will add overhead if
482 there's a lot of tasks running at once.
483
484
485sched_switch
486------------
487
488This tracer simply records schedule switches. Here is an example
489of how to use it.
490
491 # echo sched_switch > current_tracer
492 # echo 1 > tracing_enabled
493 # sleep 1
494 # echo 0 > tracing_enabled
495 # cat trace
496
497# tracer: sched_switch
498#
499# TASK-PID CPU# TIMESTAMP FUNCTION
500# | | | | |
501 bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R
502 bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R
503 sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R
504 bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S
505 bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R
506 sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R
507 bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D
508 bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R
509 <idle>-0 [00] 240.132589: 0:140:R + 4:115:S
510 <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R
511 ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R
512 <idle>-0 [00] 240.132598: 0:140:R + 4:115:S
513 <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R
514 ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R
515 sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R
516 [...]
517
518
519As we have discussed previously about this format, the header
520shows the name of the trace and points to the options. The
521"FUNCTION" is a misnomer since here it represents the wake ups
522and context switches.
523
524The sched_switch file only lists the wake ups (represented with
525'+') and context switches ('==>') with the previous task or
526current task first followed by the next task or task waking up.
527The format for both of these is PID:KERNEL-PRIO:TASK-STATE.
528Remember that the KERNEL-PRIO is the inverse of the actual
529priority with zero (0) being the highest priority and the nice
530values starting at 100 (nice -20). Below is a quick chart to map
531the kernel priority to user land priorities.
532
533 Kernel Space User Space
534 ===============================================================
535 0(high) to 98(low) user RT priority 99(high) to 1(low)
536 with SCHED_RR or SCHED_FIFO
537 ---------------------------------------------------------------
538 99 sched_priority is not used in scheduling
539 decisions(it must be specified as 0)
540 ---------------------------------------------------------------
541 100(high) to 139(low) user nice -20(high) to 19(low)
542 ---------------------------------------------------------------
543 140 idle task priority
544 ---------------------------------------------------------------
545
546The task states are:
547
548 R - running : wants to run, may not actually be running
549 S - sleep : process is waiting to be woken up (handles signals)
550 D - disk sleep (uninterruptible sleep) : process must be woken up
551 (ignores signals)
552 T - stopped : process suspended
553 t - traced : process is being traced (with something like gdb)
554 Z - zombie : process waiting to be cleaned up
555 X - unknown
556
557
558ftrace_enabled
559--------------
560
561The following tracers (listed below) give different output
562depending on whether or not the sysctl ftrace_enabled is set. To
563set ftrace_enabled, one can either use the sysctl function or
564set it via the proc file system interface.
565
566 sysctl kernel.ftrace_enabled=1
567
568 or
569
570 echo 1 > /proc/sys/kernel/ftrace_enabled
571
572To disable ftrace_enabled simply replace the '1' with '0' in the
573above commands.
574
575When ftrace_enabled is set the tracers will also record the
576functions that are within the trace. The descriptions of the
577tracers will also show an example with ftrace enabled.
578
579
580irqsoff
581-------
582
583When interrupts are disabled, the CPU can not react to any other
584external event (besides NMIs and SMIs). This prevents the timer
585interrupt from triggering or the mouse interrupt from letting
586the kernel know of a new mouse event. The result is a latency
587with the reaction time.
588
589The irqsoff tracer tracks the time for which interrupts are
590disabled. When a new maximum latency is hit, the tracer saves
591the trace leading up to that latency point so that every time a
592new maximum is reached, the old saved trace is discarded and the
593new trace is saved.
594
595To reset the maximum, echo 0 into tracing_max_latency. Here is
596an example:
597
598 # echo irqsoff > current_tracer
599 # echo 0 > tracing_max_latency
600 # echo 1 > tracing_enabled
601 # ls -ltr
602 [...]
603 # echo 0 > tracing_enabled
604 # cat latency_trace
605# tracer: irqsoff
606#
607irqsoff latency trace v1.1.5 on 2.6.26
608--------------------------------------------------------------------
609 latency: 12 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
610 -----------------
611 | task: bash-3730 (uid:0 nice:0 policy:0 rt_prio:0)
612 -----------------
613 => started at: sys_setpgid
614 => ended at: sys_setpgid
615
616# _------=> CPU#
617# / _-----=> irqs-off
618# | / _----=> need-resched
619# || / _---=> hardirq/softirq
620# ||| / _--=> preempt-depth
621# |||| /
622# ||||| delay
623# cmd pid ||||| time | caller
624# \ / ||||| \ | /
625 bash-3730 1d... 0us : _write_lock_irq (sys_setpgid)
626 bash-3730 1d..1 1us+: _write_unlock_irq (sys_setpgid)
627 bash-3730 1d..2 14us : trace_hardirqs_on (sys_setpgid)
628
629
630Here we see that that we had a latency of 12 microsecs (which is
631very good). The _write_lock_irq in sys_setpgid disabled
632interrupts. The difference between the 12 and the displayed
633timestamp 14us occurred because the clock was incremented
634between the time of recording the max latency and the time of
635recording the function that had that latency.
636
637Note the above example had ftrace_enabled not set. If we set the
638ftrace_enabled, we get a much larger output:
639
640# tracer: irqsoff
641#
642irqsoff latency trace v1.1.5 on 2.6.26-rc8
643--------------------------------------------------------------------
644 latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
645 -----------------
646 | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0)
647 -----------------
648 => started at: __alloc_pages_internal
649 => ended at: __alloc_pages_internal
650
651# _------=> CPU#
652# / _-----=> irqs-off
653# | / _----=> need-resched
654# || / _---=> hardirq/softirq
655# ||| / _--=> preempt-depth
656# |||| /
657# ||||| delay
658# cmd pid ||||| time | caller
659# \ / ||||| \ | /
660 ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal)
661 ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist)
662 ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk)
663 ls-4339 0d..1 4us : add_preempt_count (_spin_lock)
664 ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk)
665 ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue)
666 ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest)
667 ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk)
668 ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue)
669 ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest)
670 ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk)
671 ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue)
672[...]
673 ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue)
674 ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest)
675 ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk)
676 ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue)
677 ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest)
678 ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk)
679 ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock)
680 ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal)
681 ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal)
682
683
684
685Here we traced a 50 microsecond latency. But we also see all the
686functions that were called during that time. Note that by
687enabling function tracing, we incur an added overhead. This
688overhead may extend the latency times. But nevertheless, this
689trace has provided some very helpful debugging information.
690
691
692preemptoff
693----------
694
695When preemption is disabled, we may be able to receive
696interrupts but the task cannot be preempted and a higher
697priority task must wait for preemption to be enabled again
698before it can preempt a lower priority task.
699
700The preemptoff tracer traces the places that disable preemption.
701Like the irqsoff tracer, it records the maximum latency for
702which preemption was disabled. The control of preemptoff tracer
703is much like the irqsoff tracer.
704
705 # echo preemptoff > current_tracer
706 # echo 0 > tracing_max_latency
707 # echo 1 > tracing_enabled
708 # ls -ltr
709 [...]
710 # echo 0 > tracing_enabled
711 # cat latency_trace
712# tracer: preemptoff
713#
714preemptoff latency trace v1.1.5 on 2.6.26-rc8
715--------------------------------------------------------------------
716 latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
717 -----------------
718 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
719 -----------------
720 => started at: do_IRQ
721 => ended at: __do_softirq
722
723# _------=> CPU#
724# / _-----=> irqs-off
725# | / _----=> need-resched
726# || / _---=> hardirq/softirq
727# ||| / _--=> preempt-depth
728# |||| /
729# ||||| delay
730# cmd pid ||||| time | caller
731# \ / ||||| \ | /
732 sshd-4261 0d.h. 0us+: irq_enter (do_IRQ)
733 sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq)
734 sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq)
735
736
737This has some more changes. Preemption was disabled when an
738interrupt came in (notice the 'h'), and was enabled while doing
739a softirq. (notice the 's'). But we also see that interrupts
740have been disabled when entering the preempt off section and
741leaving it (the 'd'). We do not know if interrupts were enabled
742in the mean time.
743
744# tracer: preemptoff
745#
746preemptoff latency trace v1.1.5 on 2.6.26-rc8
747--------------------------------------------------------------------
748 latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
749 -----------------
750 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
751 -----------------
752 => started at: remove_wait_queue
753 => ended at: __do_softirq
754
755# _------=> CPU#
756# / _-----=> irqs-off
757# | / _----=> need-resched
758# || / _---=> hardirq/softirq
759# ||| / _--=> preempt-depth
760# |||| /
761# ||||| delay
762# cmd pid ||||| time | caller
763# \ / ||||| \ | /
764 sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue)
765 sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue)
766 sshd-4261 0d..1 2us : do_IRQ (common_interrupt)
767 sshd-4261 0d..1 2us : irq_enter (do_IRQ)
768 sshd-4261 0d..1 2us : idle_cpu (irq_enter)
769 sshd-4261 0d..1 3us : add_preempt_count (irq_enter)
770 sshd-4261 0d.h1 3us : idle_cpu (irq_enter)
771 sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ)
772[...]
773 sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock)
774 sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq)
775 sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq)
776 sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq)
777 sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock)
778 sshd-4261 0d.h1 14us : irq_exit (do_IRQ)
779 sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit)
780 sshd-4261 0d..2 15us : do_softirq (irq_exit)
781 sshd-4261 0d... 15us : __do_softirq (do_softirq)
782 sshd-4261 0d... 16us : __local_bh_disable (__do_softirq)
783 sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable)
784 sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable)
785 sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable)
786 sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable)
787[...]
788 sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable)
789 sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable)
790 sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable)
791 sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable)
792 sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip)
793 sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip)
794 sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable)
795 sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable)
796[...]
797 sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq)
798 sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq)
799
800
801The above is an example of the preemptoff trace with
802ftrace_enabled set. Here we see that interrupts were disabled
803the entire time. The irq_enter code lets us know that we entered
804an interrupt 'h'. Before that, the functions being traced still
805show that it is not in an interrupt, but we can see from the
806functions themselves that this is not the case.
807
808Notice that __do_softirq when called does not have a
809preempt_count. It may seem that we missed a preempt enabling.
810What really happened is that the preempt count is held on the
811thread's stack and we switched to the softirq stack (4K stacks
812in effect). The code does not copy the preempt count, but
813because interrupts are disabled, we do not need to worry about
814it. Having a tracer like this is good for letting people know
815what really happens inside the kernel.
816
817
818preemptirqsoff
819--------------
820
821Knowing the locations that have interrupts disabled or
822preemption disabled for the longest times is helpful. But
823sometimes we would like to know when either preemption and/or
824interrupts are disabled.
825
826Consider the following code:
827
828 local_irq_disable();
829 call_function_with_irqs_off();
830 preempt_disable();
831 call_function_with_irqs_and_preemption_off();
832 local_irq_enable();
833 call_function_with_preemption_off();
834 preempt_enable();
835
836The irqsoff tracer will record the total length of
837call_function_with_irqs_off() and
838call_function_with_irqs_and_preemption_off().
839
840The preemptoff tracer will record the total length of
841call_function_with_irqs_and_preemption_off() and
842call_function_with_preemption_off().
843
844But neither will trace the time that interrupts and/or
845preemption is disabled. This total time is the time that we can
846not schedule. To record this time, use the preemptirqsoff
847tracer.
848
849Again, using this trace is much like the irqsoff and preemptoff
850tracers.
851
852 # echo preemptirqsoff > current_tracer
853 # echo 0 > tracing_max_latency
854 # echo 1 > tracing_enabled
855 # ls -ltr
856 [...]
857 # echo 0 > tracing_enabled
858 # cat latency_trace
859# tracer: preemptirqsoff
860#
861preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
862--------------------------------------------------------------------
863 latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
864 -----------------
865 | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0)
866 -----------------
867 => started at: apic_timer_interrupt
868 => ended at: __do_softirq
869
870# _------=> CPU#
871# / _-----=> irqs-off
872# | / _----=> need-resched
873# || / _---=> hardirq/softirq
874# ||| / _--=> preempt-depth
875# |||| /
876# ||||| delay
877# cmd pid ||||| time | caller
878# \ / ||||| \ | /
879 ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt)
880 ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq)
881 ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq)
882
883
884
885The trace_hardirqs_off_thunk is called from assembly on x86 when
886interrupts are disabled in the assembly code. Without the
887function tracing, we do not know if interrupts were enabled
888within the preemption points. We do see that it started with
889preemption enabled.
890
891Here is a trace with ftrace_enabled set:
892
893
894# tracer: preemptirqsoff
895#
896preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8
897--------------------------------------------------------------------
898 latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
899 -----------------
900 | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0)
901 -----------------
902 => started at: write_chan
903 => ended at: __do_softirq
904
905# _------=> CPU#
906# / _-----=> irqs-off
907# | / _----=> need-resched
908# || / _---=> hardirq/softirq
909# ||| / _--=> preempt-depth
910# |||| /
911# ||||| delay
912# cmd pid ||||| time | caller
913# \ / ||||| \ | /
914 ls-4473 0.N.. 0us : preempt_schedule (write_chan)
915 ls-4473 0dN.1 1us : _spin_lock (schedule)
916 ls-4473 0dN.1 2us : add_preempt_count (_spin_lock)
917 ls-4473 0d..2 2us : put_prev_task_fair (schedule)
918[...]
919 ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts)
920 ls-4473 0d..2 13us : __switch_to (schedule)
921 sshd-4261 0d..2 14us : finish_task_switch (schedule)
922 sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch)
923 sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave)
924 sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set)
925 sshd-4261 0d..2 16us : do_IRQ (common_interrupt)
926 sshd-4261 0d..2 17us : irq_enter (do_IRQ)
927 sshd-4261 0d..2 17us : idle_cpu (irq_enter)
928 sshd-4261 0d..2 18us : add_preempt_count (irq_enter)
929 sshd-4261 0d.h2 18us : idle_cpu (irq_enter)
930 sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ)
931 sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq)
932 sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock)
933 sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq)
934 sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock)
935[...]
936 sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq)
937 sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock)
938 sshd-4261 0d.h2 29us : irq_exit (do_IRQ)
939 sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit)
940 sshd-4261 0d..3 30us : do_softirq (irq_exit)
941 sshd-4261 0d... 30us : __do_softirq (do_softirq)
942 sshd-4261 0d... 31us : __local_bh_disable (__do_softirq)
943 sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable)
944 sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable)
945[...]
946 sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip)
947 sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip)
948 sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt)
949 sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt)
950 sshd-4261 0d.s3 45us : idle_cpu (irq_enter)
951 sshd-4261 0d.s3 46us : add_preempt_count (irq_enter)
952 sshd-4261 0d.H3 46us : idle_cpu (irq_enter)
953 sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt)
954 sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt)
955[...]
956 sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt)
957 sshd-4261 0d.H3 82us : ktime_get (tick_program_event)
958 sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get)
959 sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts)
960 sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts)
961 sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event)
962 sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event)
963 sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt)
964 sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit)
965 sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit)
966 sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable)
967[...]
968 sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action)
969 sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq)
970 sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq)
971 sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq)
972 sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable)
973 sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq)
974 sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq)
975
976
977This is a very interesting trace. It started with the preemption
978of the ls task. We see that the task had the "need_resched" bit
979set via the 'N' in the trace. Interrupts were disabled before
980the spin_lock at the beginning of the trace. We see that a
981schedule took place to run sshd. When the interrupts were
982enabled, we took an interrupt. On return from the interrupt
983handler, the softirq ran. We took another interrupt while
984running the softirq as we see from the capital 'H'.
985
986
987wakeup
988------
989
990In a Real-Time environment it is very important to know the
991wakeup time it takes for the highest priority task that is woken
992up to the time that it executes. This is also known as "schedule
993latency". I stress the point that this is about RT tasks. It is
994also important to know the scheduling latency of non-RT tasks,
995but the average schedule latency is better for non-RT tasks.
996Tools like LatencyTop are more appropriate for such
997measurements.
998
999Real-Time environments are interested in the worst case latency.
1000That is the longest latency it takes for something to happen,
1001and not the average. We can have a very fast scheduler that may
1002only have a large latency once in a while, but that would not
1003work well with Real-Time tasks. The wakeup tracer was designed
1004to record the worst case wakeups of RT tasks. Non-RT tasks are
1005not recorded because the tracer only records one worst case and
1006tracing non-RT tasks that are unpredictable will overwrite the
1007worst case latency of RT tasks.
1008
1009Since this tracer only deals with RT tasks, we will run this
1010slightly differently than we did with the previous tracers.
1011Instead of performing an 'ls', we will run 'sleep 1' under
1012'chrt' which changes the priority of the task.
1013
1014 # echo wakeup > current_tracer
1015 # echo 0 > tracing_max_latency
1016 # echo 1 > tracing_enabled
1017 # chrt -f 5 sleep 1
1018 # echo 0 > tracing_enabled
1019 # cat latency_trace
1020# tracer: wakeup
1021#
1022wakeup latency trace v1.1.5 on 2.6.26-rc8
1023--------------------------------------------------------------------
1024 latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
1025 -----------------
1026 | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5)
1027 -----------------
1028
1029# _------=> CPU#
1030# / _-----=> irqs-off
1031# | / _----=> need-resched
1032# || / _---=> hardirq/softirq
1033# ||| / _--=> preempt-depth
1034# |||| /
1035# ||||| delay
1036# cmd pid ||||| time | caller
1037# \ / ||||| \ | /
1038 <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process)
1039 <idle>-0 1d..4 4us : schedule (cpu_idle)
1040
1041
1042Running this on an idle system, we see that it only took 4
1043microseconds to perform the task switch. Note, since the trace
1044marker in the schedule is before the actual "switch", we stop
1045the tracing when the recorded task is about to schedule in. This
1046may change if we add a new marker at the end of the scheduler.
1047
1048Notice that the recorded task is 'sleep' with the PID of 4901
1049and it has an rt_prio of 5. This priority is user-space priority
1050and not the internal kernel priority. The policy is 1 for
1051SCHED_FIFO and 2 for SCHED_RR.
1052
1053Doing the same with chrt -r 5 and ftrace_enabled set.
1054
1055# tracer: wakeup
1056#
1057wakeup latency trace v1.1.5 on 2.6.26-rc8
1058--------------------------------------------------------------------
1059 latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2)
1060 -----------------
1061 | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5)
1062 -----------------
1063
1064# _------=> CPU#
1065# / _-----=> irqs-off
1066# | / _----=> need-resched
1067# || / _---=> hardirq/softirq
1068# ||| / _--=> preempt-depth
1069# |||| /
1070# ||||| delay
1071# cmd pid ||||| time | caller
1072# \ / ||||| \ | /
1073ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process)
1074ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb)
1075ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up)
1076ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup)
1077ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr)
1078ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup)
1079ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up)
1080ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up)
1081[...]
1082ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt)
1083ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit)
1084ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit)
1085ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq)
1086[...]
1087ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks)
1088ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq)
1089ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable)
1090ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd)
1091ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd)
1092ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched)
1093ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched)
1094ksoftirq-7 1.N.2 33us : schedule (__cond_resched)
1095ksoftirq-7 1.N.2 33us : add_preempt_count (schedule)
1096ksoftirq-7 1.N.3 34us : hrtick_clear (schedule)
1097ksoftirq-7 1dN.3 35us : _spin_lock (schedule)
1098ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock)
1099ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule)
1100ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair)
1101[...]
1102ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline)
1103ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock)
1104ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline)
1105ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock)
1106ksoftirq-7 1d..4 50us : schedule (__cond_resched)
1107
1108The interrupt went off while running ksoftirqd. This task runs
1109at SCHED_OTHER. Why did not we see the 'N' set early? This may
1110be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K
1111stacks configured, the interrupt and softirq run with their own
1112stack. Some information is held on the top of the task's stack
1113(need_resched and preempt_count are both stored there). The
1114setting of the NEED_RESCHED bit is done directly to the task's
1115stack, but the reading of the NEED_RESCHED is done by looking at
1116the current stack, which in this case is the stack for the hard
1117interrupt. This hides the fact that NEED_RESCHED has been set.
1118We do not see the 'N' until we switch back to the task's
1119assigned stack.
1120
1121function
1122--------
1123
1124This tracer is the function tracer. Enabling the function tracer
1125can be done from the debug file system. Make sure the
1126ftrace_enabled is set; otherwise this tracer is a nop.
1127
1128 # sysctl kernel.ftrace_enabled=1
1129 # echo function > current_tracer
1130 # echo 1 > tracing_enabled
1131 # usleep 1
1132 # echo 0 > tracing_enabled
1133 # cat trace
1134# tracer: function
1135#
1136# TASK-PID CPU# TIMESTAMP FUNCTION
1137# | | | | |
1138 bash-4003 [00] 123.638713: finish_task_switch <-schedule
1139 bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch
1140 bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq
1141 bash-4003 [00] 123.638715: hrtick_set <-schedule
1142 bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set
1143 bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave
1144 bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set
1145 bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore
1146 bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set
1147 bash-4003 [00] 123.638718: sub_preempt_count <-schedule
1148 bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule
1149 bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run
1150 bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion
1151 bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common
1152 bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq
1153[...]
1154
1155
1156Note: function tracer uses ring buffers to store the above
1157entries. The newest data may overwrite the oldest data.
1158Sometimes using echo to stop the trace is not sufficient because
1159the tracing could have overwritten the data that you wanted to
1160record. For this reason, it is sometimes better to disable
1161tracing directly from a program. This allows you to stop the
1162tracing at the point that you hit the part that you are
1163interested in. To disable the tracing directly from a C program,
1164something like following code snippet can be used:
1165
1166int trace_fd;
1167[...]
1168int main(int argc, char *argv[]) {
1169 [...]
1170 trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY);
1171 [...]
1172 if (condition_hit()) {
1173 write(trace_fd, "0", 1);
1174 }
1175 [...]
1176}
1177
1178
1179Single thread tracing
1180---------------------
1181
1182By writing into set_ftrace_pid you can trace a
1183single thread. For example:
1184
1185# cat set_ftrace_pid
1186no pid
1187# echo 3111 > set_ftrace_pid
1188# cat set_ftrace_pid
11893111
1190# echo function > current_tracer
1191# cat trace | head
1192 # tracer: function
1193 #
1194 # TASK-PID CPU# TIMESTAMP FUNCTION
1195 # | | | | |
1196 yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return
1197 yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range
1198 yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel
1199 yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
1200 yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll
1201 yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll
1202# echo -1 > set_ftrace_pid
1203# cat trace |head
1204 # tracer: function
1205 #
1206 # TASK-PID CPU# TIMESTAMP FUNCTION
1207 # | | | | |
1208 ##### CPU 3 buffer started ####
1209 yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait
1210 yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry
1211 yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry
1212 yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit
1213 yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit
1214
1215If you want to trace a function when executing, you could use
1216something like this simple program:
1217
1218#include <stdio.h>
1219#include <stdlib.h>
1220#include <sys/types.h>
1221#include <sys/stat.h>
1222#include <fcntl.h>
1223#include <unistd.h>
1224
1225#define _STR(x) #x
1226#define STR(x) _STR(x)
1227#define MAX_PATH 256
1228
1229const char *find_debugfs(void)
1230{
1231 static char debugfs[MAX_PATH+1];
1232 static int debugfs_found;
1233 char type[100];
1234 FILE *fp;
1235
1236 if (debugfs_found)
1237 return debugfs;
1238
1239 if ((fp = fopen("/proc/mounts","r")) == NULL) {
1240 perror("/proc/mounts");
1241 return NULL;
1242 }
1243
1244 while (fscanf(fp, "%*s %"
1245 STR(MAX_PATH)
1246 "s %99s %*s %*d %*d\n",
1247 debugfs, type) == 2) {
1248 if (strcmp(type, "debugfs") == 0)
1249 break;
1250 }
1251 fclose(fp);
1252
1253 if (strcmp(type, "debugfs") != 0) {
1254 fprintf(stderr, "debugfs not mounted");
1255 return NULL;
1256 }
1257
1258 debugfs_found = 1;
1259
1260 return debugfs;
1261}
1262
1263const char *tracing_file(const char *file_name)
1264{
1265 static char trace_file[MAX_PATH+1];
1266 snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name);
1267 return trace_file;
1268}
1269
1270int main (int argc, char **argv)
1271{
1272 if (argc < 1)
1273 exit(-1);
1274
1275 if (fork() > 0) {
1276 int fd, ffd;
1277 char line[64];
1278 int s;
1279
1280 ffd = open(tracing_file("current_tracer"), O_WRONLY);
1281 if (ffd < 0)
1282 exit(-1);
1283 write(ffd, "nop", 3);
1284
1285 fd = open(tracing_file("set_ftrace_pid"), O_WRONLY);
1286 s = sprintf(line, "%d\n", getpid());
1287 write(fd, line, s);
1288
1289 write(ffd, "function", 8);
1290
1291 close(fd);
1292 close(ffd);
1293
1294 execvp(argv[1], argv+1);
1295 }
1296
1297 return 0;
1298}
1299
1300
1301hw-branch-tracer (x86 only)
1302---------------------------
1303
1304This tracer uses the x86 last branch tracing hardware feature to
1305collect a branch trace on all cpus with relatively low overhead.
1306
1307The tracer uses a fixed-size circular buffer per cpu and only
1308traces ring 0 branches. The trace file dumps that buffer in the
1309following format:
1310
1311# tracer: hw-branch-tracer
1312#
1313# CPU# TO <- FROM
1314 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6
1315 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a
1316 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf
1317 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf
1318 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a
1319 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf
1320
1321
1322The tracer may be used to dump the trace for the oops'ing cpu on
1323a kernel oops into the system log. To enable this,
1324ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
1325can either use the sysctl function or set it via the proc system
1326interface.
1327
1328 sysctl kernel.ftrace_dump_on_oops=1
1329
1330or
1331
1332 echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
1333
1334
1335Here's an example of such a dump after a null pointer
1336dereference in a kernel module:
1337
1338[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
1339[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
1340[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
1341[57848.106019] Oops: 0002 [#1] SMP
1342[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
1343[57848.106019] Dumping ftrace buffer:
1344[57848.106019] ---------------------------------
1345[...]
1346[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24
1347[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165
1348[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165
1349[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165
1350[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165
1351[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops]
1352[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30
1353[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b
1354[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31
1355[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1
1356[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30
1357[...]
1358[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2
1359[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881
1360[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881
1361[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96
1362[...]
1363[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3
1364[57848.106019] ---------------------------------
1365[57848.106019] CPU 0
1366[57848.106019] Modules linked in: oops
1367[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23
1368[57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops]
1369[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246
1370[...]
1371
1372
1373function graph tracer
1374---------------------------
1375
1376This tracer is similar to the function tracer except that it
1377probes a function on its entry and its exit. This is done by
1378using a dynamically allocated stack of return addresses in each
1379task_struct. On function entry the tracer overwrites the return
1380address of each function traced to set a custom probe. Thus the
1381original return address is stored on the stack of return address
1382in the task_struct.
1383
1384Probing on both ends of a function leads to special features
1385such as:
1386
1387- measure of a function's time execution
1388- having a reliable call stack to draw function calls graph
1389
1390This tracer is useful in several situations:
1391
1392- you want to find the reason of a strange kernel behavior and
1393 need to see what happens in detail on any areas (or specific
1394 ones).
1395
1396- you are experiencing weird latencies but it's difficult to
1397 find its origin.
1398
1399- you want to find quickly which path is taken by a specific
1400 function
1401
1402- you just want to peek inside a working kernel and want to see
1403 what happens there.
1404
1405# tracer: function_graph
1406#
1407# CPU DURATION FUNCTION CALLS
1408# | | | | | | |
1409
1410 0) | sys_open() {
1411 0) | do_sys_open() {
1412 0) | getname() {
1413 0) | kmem_cache_alloc() {
1414 0) 1.382 us | __might_sleep();
1415 0) 2.478 us | }
1416 0) | strncpy_from_user() {
1417 0) | might_fault() {
1418 0) 1.389 us | __might_sleep();
1419 0) 2.553 us | }
1420 0) 3.807 us | }
1421 0) 7.876 us | }
1422 0) | alloc_fd() {
1423 0) 0.668 us | _spin_lock();
1424 0) 0.570 us | expand_files();
1425 0) 0.586 us | _spin_unlock();
1426
1427
1428There are several columns that can be dynamically
1429enabled/disabled. You can use every combination of options you
1430want, depending on your needs.
1431
1432- The cpu number on which the function executed is default
1433 enabled. It is sometimes better to only trace one cpu (see
1434 tracing_cpu_mask file) or you might sometimes see unordered
1435 function calls while cpu tracing switch.
1436
1437 hide: echo nofuncgraph-cpu > trace_options
1438 show: echo funcgraph-cpu > trace_options
1439
1440- The duration (function's time of execution) is displayed on
1441 the closing bracket line of a function or on the same line
1442 than the current function in case of a leaf one. It is default
1443 enabled.
1444
1445 hide: echo nofuncgraph-duration > trace_options
1446 show: echo funcgraph-duration > trace_options
1447
1448- The overhead field precedes the duration field in case of
1449 reached duration thresholds.
1450
1451 hide: echo nofuncgraph-overhead > trace_options
1452 show: echo funcgraph-overhead > trace_options
1453 depends on: funcgraph-duration
1454
1455 ie:
1456
1457 0) | up_write() {
1458 0) 0.646 us | _spin_lock_irqsave();
1459 0) 0.684 us | _spin_unlock_irqrestore();
1460 0) 3.123 us | }
1461 0) 0.548 us | fput();
1462 0) + 58.628 us | }
1463
1464 [...]
1465
1466 0) | putname() {
1467 0) | kmem_cache_free() {
1468 0) 0.518 us | __phys_addr();
1469 0) 1.757 us | }
1470 0) 2.861 us | }
1471 0) ! 115.305 us | }
1472 0) ! 116.402 us | }
1473
1474 + means that the function exceeded 10 usecs.
1475 ! means that the function exceeded 100 usecs.
1476
1477
1478- The task/pid field displays the thread cmdline and pid which
1479 executed the function. It is default disabled.
1480
1481 hide: echo nofuncgraph-proc > trace_options
1482 show: echo funcgraph-proc > trace_options
1483
1484 ie:
1485
1486 # tracer: function_graph
1487 #
1488 # CPU TASK/PID DURATION FUNCTION CALLS
1489 # | | | | | | | | |
1490 0) sh-4802 | | d_free() {
1491 0) sh-4802 | | call_rcu() {
1492 0) sh-4802 | | __call_rcu() {
1493 0) sh-4802 | 0.616 us | rcu_process_gp_end();
1494 0) sh-4802 | 0.586 us | check_for_new_grace_period();
1495 0) sh-4802 | 2.899 us | }
1496 0) sh-4802 | 4.040 us | }
1497 0) sh-4802 | 5.151 us | }
1498 0) sh-4802 | + 49.370 us | }
1499
1500
1501- The absolute time field is an absolute timestamp given by the
1502 system clock since it started. A snapshot of this time is
1503 given on each entry/exit of functions
1504
1505 hide: echo nofuncgraph-abstime > trace_options
1506 show: echo funcgraph-abstime > trace_options
1507
1508 ie:
1509
1510 #
1511 # TIME CPU DURATION FUNCTION CALLS
1512 # | | | | | | | |
1513 360.774522 | 1) 0.541 us | }
1514 360.774522 | 1) 4.663 us | }
1515 360.774523 | 1) 0.541 us | __wake_up_bit();
1516 360.774524 | 1) 6.796 us | }
1517 360.774524 | 1) 7.952 us | }
1518 360.774525 | 1) 9.063 us | }
1519 360.774525 | 1) 0.615 us | journal_mark_dirty();
1520 360.774527 | 1) 0.578 us | __brelse();
1521 360.774528 | 1) | reiserfs_prepare_for_journal() {
1522 360.774528 | 1) | unlock_buffer() {
1523 360.774529 | 1) | wake_up_bit() {
1524 360.774529 | 1) | bit_waitqueue() {
1525 360.774530 | 1) 0.594 us | __phys_addr();
1526
1527
1528You can put some comments on specific functions by using
1529trace_printk() For example, if you want to put a comment inside
1530the __might_sleep() function, you just have to include
1531<linux/ftrace.h> and call trace_printk() inside __might_sleep()
1532
1533trace_printk("I'm a comment!\n")
1534
1535will produce:
1536
1537 1) | __might_sleep() {
1538 1) | /* I'm a comment! */
1539 1) 1.449 us | }
1540
1541
1542You might find other useful features for this tracer in the
1543following "dynamic ftrace" section such as tracing only specific
1544functions or tasks.
1545
1546dynamic ftrace
1547--------------
1548
1549If CONFIG_DYNAMIC_FTRACE is set, the system will run with
1550virtually no overhead when function tracing is disabled. The way
1551this works is the mcount function call (placed at the start of
1552every kernel function, produced by the -pg switch in gcc),
1553starts of pointing to a simple return. (Enabling FTRACE will
1554include the -pg switch in the compiling of the kernel.)
1555
1556At compile time every C file object is run through the
1557recordmcount.pl script (located in the scripts directory). This
1558script will process the C object using objdump to find all the
1559locations in the .text section that call mcount. (Note, only the
1560.text section is processed, since processing other sections like
1561.init.text may cause races due to those sections being freed).
1562
1563A new section called "__mcount_loc" is created that holds
1564references to all the mcount call sites in the .text section.
1565This section is compiled back into the original object. The
1566final linker will add all these references into a single table.
1567
1568On boot up, before SMP is initialized, the dynamic ftrace code
1569scans this table and updates all the locations into nops. It
1570also records the locations, which are added to the
1571available_filter_functions list. Modules are processed as they
1572are loaded and before they are executed. When a module is
1573unloaded, it also removes its functions from the ftrace function
1574list. This is automatic in the module unload code, and the
1575module author does not need to worry about it.
1576
1577When tracing is enabled, kstop_machine is called to prevent
1578races with the CPUS executing code being modified (which can
1579cause the CPU to do undesireable things), and the nops are
1580patched back to calls. But this time, they do not call mcount
1581(which is just a function stub). They now call into the ftrace
1582infrastructure.
1583
1584One special side-effect to the recording of the functions being
1585traced is that we can now selectively choose which functions we
1586wish to trace and which ones we want the mcount calls to remain
1587as nops.
1588
1589Two files are used, one for enabling and one for disabling the
1590tracing of specified functions. They are:
1591
1592 set_ftrace_filter
1593
1594and
1595
1596 set_ftrace_notrace
1597
1598A list of available functions that you can add to these files is
1599listed in:
1600
1601 available_filter_functions
1602
1603 # cat available_filter_functions
1604put_prev_task_idle
1605kmem_cache_create
1606pick_next_task_rt
1607get_online_cpus
1608pick_next_task_fair
1609mutex_lock
1610[...]
1611
1612If I am only interested in sys_nanosleep and hrtimer_interrupt:
1613
1614 # echo sys_nanosleep hrtimer_interrupt \
1615 > set_ftrace_filter
1616 # echo ftrace > current_tracer
1617 # echo 1 > tracing_enabled
1618 # usleep 1
1619 # echo 0 > tracing_enabled
1620 # cat trace
1621# tracer: ftrace
1622#
1623# TASK-PID CPU# TIMESTAMP FUNCTION
1624# | | | | |
1625 usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt
1626 usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call
1627 <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt
1628
1629To see which functions are being traced, you can cat the file:
1630
1631 # cat set_ftrace_filter
1632hrtimer_interrupt
1633sys_nanosleep
1634
1635
1636Perhaps this is not enough. The filters also allow simple wild
1637cards. Only the following are currently available
1638
1639 <match>* - will match functions that begin with <match>
1640 *<match> - will match functions that end with <match>
1641 *<match>* - will match functions that have <match> in it
1642
1643These are the only wild cards which are supported.
1644
1645 <match>*<match> will not work.
1646
1647Note: It is better to use quotes to enclose the wild cards,
1648 otherwise the shell may expand the parameters into names
1649 of files in the local directory.
1650
1651 # echo 'hrtimer_*' > set_ftrace_filter
1652
1653Produces:
1654
1655# tracer: ftrace
1656#
1657# TASK-PID CPU# TIMESTAMP FUNCTION
1658# | | | | |
1659 bash-4003 [00] 1480.611794: hrtimer_init <-copy_process
1660 bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set
1661 bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear
1662 bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel
1663 <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt
1664 <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt
1665 <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt
1666 <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt
1667 <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt
1668
1669
1670Notice that we lost the sys_nanosleep.
1671
1672 # cat set_ftrace_filter
1673hrtimer_run_queues
1674hrtimer_run_pending
1675hrtimer_init
1676hrtimer_cancel
1677hrtimer_try_to_cancel
1678hrtimer_forward
1679hrtimer_start
1680hrtimer_reprogram
1681hrtimer_force_reprogram
1682hrtimer_get_next_event
1683hrtimer_interrupt
1684hrtimer_nanosleep
1685hrtimer_wakeup
1686hrtimer_get_remaining
1687hrtimer_get_res
1688hrtimer_init_sleeper
1689
1690
1691This is because the '>' and '>>' act just like they do in bash.
1692To rewrite the filters, use '>'
1693To append to the filters, use '>>'
1694
1695To clear out a filter so that all functions will be recorded
1696again:
1697
1698 # echo > set_ftrace_filter
1699 # cat set_ftrace_filter
1700 #
1701
1702Again, now we want to append.
1703
1704 # echo sys_nanosleep > set_ftrace_filter
1705 # cat set_ftrace_filter
1706sys_nanosleep
1707 # echo 'hrtimer_*' >> set_ftrace_filter
1708 # cat set_ftrace_filter
1709hrtimer_run_queues
1710hrtimer_run_pending
1711hrtimer_init
1712hrtimer_cancel
1713hrtimer_try_to_cancel
1714hrtimer_forward
1715hrtimer_start
1716hrtimer_reprogram
1717hrtimer_force_reprogram
1718hrtimer_get_next_event
1719hrtimer_interrupt
1720sys_nanosleep
1721hrtimer_nanosleep
1722hrtimer_wakeup
1723hrtimer_get_remaining
1724hrtimer_get_res
1725hrtimer_init_sleeper
1726
1727
1728The set_ftrace_notrace prevents those functions from being
1729traced.
1730
1731 # echo '*preempt*' '*lock*' > set_ftrace_notrace
1732
1733Produces:
1734
1735# tracer: ftrace
1736#
1737# TASK-PID CPU# TIMESTAMP FUNCTION
1738# | | | | |
1739 bash-4043 [01] 115.281644: finish_task_switch <-schedule
1740 bash-4043 [01] 115.281645: hrtick_set <-schedule
1741 bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set
1742 bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run
1743 bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion
1744 bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run
1745 bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop
1746 bash-4043 [01] 115.281648: wake_up_process <-kthread_stop
1747 bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process
1748
1749We can see that there's no more lock or preempt tracing.
1750
1751
1752Dynamic ftrace with the function graph tracer
1753---------------------------------------------
1754
1755Although what has been explained above concerns both the
1756function tracer and the function-graph-tracer, there are some
1757special features only available in the function-graph tracer.
1758
1759If you want to trace only one function and all of its children,
1760you just have to echo its name into set_graph_function:
1761
1762 echo __do_fault > set_graph_function
1763
1764will produce the following "expanded" trace of the __do_fault()
1765function:
1766
1767 0) | __do_fault() {
1768 0) | filemap_fault() {
1769 0) | find_lock_page() {
1770 0) 0.804 us | find_get_page();
1771 0) | __might_sleep() {
1772 0) 1.329 us | }
1773 0) 3.904 us | }
1774 0) 4.979 us | }
1775 0) 0.653 us | _spin_lock();
1776 0) 0.578 us | page_add_file_rmap();
1777 0) 0.525 us | native_set_pte_at();
1778 0) 0.585 us | _spin_unlock();
1779 0) | unlock_page() {
1780 0) 0.541 us | page_waitqueue();
1781 0) 0.639 us | __wake_up_bit();
1782 0) 2.786 us | }
1783 0) + 14.237 us | }
1784 0) | __do_fault() {
1785 0) | filemap_fault() {
1786 0) | find_lock_page() {
1787 0) 0.698 us | find_get_page();
1788 0) | __might_sleep() {
1789 0) 1.412 us | }
1790 0) 3.950 us | }
1791 0) 5.098 us | }
1792 0) 0.631 us | _spin_lock();
1793 0) 0.571 us | page_add_file_rmap();
1794 0) 0.526 us | native_set_pte_at();
1795 0) 0.586 us | _spin_unlock();
1796 0) | unlock_page() {
1797 0) 0.533 us | page_waitqueue();
1798 0) 0.638 us | __wake_up_bit();
1799 0) 2.793 us | }
1800 0) + 14.012 us | }
1801
1802You can also expand several functions at once:
1803
1804 echo sys_open > set_graph_function
1805 echo sys_close >> set_graph_function
1806
1807Now if you want to go back to trace all functions you can clear
1808this special filter via:
1809
1810 echo > set_graph_function
1811
1812
1813trace_pipe
1814----------
1815
1816The trace_pipe outputs the same content as the trace file, but
1817the effect on the tracing is different. Every read from
1818trace_pipe is consumed. This means that subsequent reads will be
1819different. The trace is live.
1820
1821 # echo function > current_tracer
1822 # cat trace_pipe > /tmp/trace.out &
1823[1] 4153
1824 # echo 1 > tracing_enabled
1825 # usleep 1
1826 # echo 0 > tracing_enabled
1827 # cat trace
1828# tracer: function
1829#
1830# TASK-PID CPU# TIMESTAMP FUNCTION
1831# | | | | |
1832
1833 #
1834 # cat /tmp/trace.out
1835 bash-4043 [00] 41.267106: finish_task_switch <-schedule
1836 bash-4043 [00] 41.267106: hrtick_set <-schedule
1837 bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set
1838 bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run
1839 bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion
1840 bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run
1841 bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop
1842 bash-4043 [00] 41.267110: wake_up_process <-kthread_stop
1843 bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process
1844 bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up
1845
1846
1847Note, reading the trace_pipe file will block until more input is
1848added. By changing the tracer, trace_pipe will issue an EOF. We
1849needed to set the function tracer _before_ we "cat" the
1850trace_pipe file.
1851
1852
1853trace entries
1854-------------
1855
1856Having too much or not enough data can be troublesome in
1857diagnosing an issue in the kernel. The file buffer_size_kb is
1858used to modify the size of the internal trace buffers. The
1859number listed is the number of entries that can be recorded per
1860CPU. To know the full size, multiply the number of possible CPUS
1861with the number of entries.
1862
1863 # cat buffer_size_kb
18641408 (units kilobytes)
1865
1866Note, to modify this, you must have tracing completely disabled.
1867To do that, echo "nop" into the current_tracer. If the
1868current_tracer is not set to "nop", an EINVAL error will be
1869returned.
1870
1871 # echo nop > current_tracer
1872 # echo 10000 > buffer_size_kb
1873 # cat buffer_size_kb
187410000 (units kilobytes)
1875
1876The number of pages which will be allocated is limited to a
1877percentage of available memory. Allocating too much will produce
1878an error.
1879
1880 # echo 1000000000000 > buffer_size_kb
1881-bash: echo: write error: Cannot allocate memory
1882 # cat buffer_size_kb
188385
1884
1885-----------
1886
1887More details can be found in the source code, in the
1888kernel/trace/*.c files.
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
new file mode 100644
index 000000000000..6308735e58ca
--- /dev/null
+++ b/Documentation/trace/kmemtrace.txt
@@ -0,0 +1,126 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/Documentation/tracers/mmiotrace.txt b/Documentation/trace/mmiotrace.txt
index 5731c67abc55..162effbfbdec 100644
--- a/Documentation/tracers/mmiotrace.txt
+++ b/Documentation/trace/mmiotrace.txt
@@ -32,41 +32,41 @@ is no way to automatically detect if you are losing events due to CPUs racing.
32Usage Quick Reference 32Usage Quick Reference
33--------------------- 33---------------------
34 34
35$ mount -t debugfs debugfs /debug 35$ mount -t debugfs debugfs /sys/kernel/debug
36$ echo mmiotrace > /debug/tracing/current_tracer 36$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer
37$ cat /debug/tracing/trace_pipe > mydump.txt & 37$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt &
38Start X or whatever. 38Start X or whatever.
39$ echo "X is up" > /debug/tracing/trace_marker 39$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker
40$ echo nop > /debug/tracing/current_tracer 40$ echo nop > /sys/kernel/debug/tracing/current_tracer
41Check for lost events. 41Check for lost events.
42 42
43 43
44Usage 44Usage
45----- 45-----
46 46
47Make sure debugfs is mounted to /debug. If not, (requires root privileges) 47Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges)
48$ mount -t debugfs debugfs /debug 48$ mount -t debugfs debugfs /sys/kernel/debug
49 49
50Check that the driver you are about to trace is not loaded. 50Check that the driver you are about to trace is not loaded.
51 51
52Activate mmiotrace (requires root privileges): 52Activate mmiotrace (requires root privileges):
53$ echo mmiotrace > /debug/tracing/current_tracer 53$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer
54 54
55Start storing the trace: 55Start storing the trace:
56$ cat /debug/tracing/trace_pipe > mydump.txt & 56$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt &
57The 'cat' process should stay running (sleeping) in the background. 57The 'cat' process should stay running (sleeping) in the background.
58 58
59Load the driver you want to trace and use it. Mmiotrace will only catch MMIO 59Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
60accesses to areas that are ioremapped while mmiotrace is active. 60accesses to areas that are ioremapped while mmiotrace is active.
61 61
62During tracing you can place comments (markers) into the trace by 62During tracing you can place comments (markers) into the trace by
63$ echo "X is up" > /debug/tracing/trace_marker 63$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker
64This makes it easier to see which part of the (huge) trace corresponds to 64This makes it easier to see which part of the (huge) trace corresponds to
65which action. It is recommended to place descriptive markers about what you 65which action. It is recommended to place descriptive markers about what you
66do. 66do.
67 67
68Shut down mmiotrace (requires root privileges): 68Shut down mmiotrace (requires root privileges):
69$ echo nop > /debug/tracing/current_tracer 69$ echo nop > /sys/kernel/debug/tracing/current_tracer
70The 'cat' process exits. If it does not, kill it by issuing 'fg' command and 70The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
71pressing ctrl+c. 71pressing ctrl+c.
72 72
@@ -78,10 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If
78events were lost, the trace is incomplete. You should enlarge the buffers and 78events were lost, the trace is incomplete. You should enlarge the buffers and
79try again. Buffers are enlarged by first seeing how large the current buffers 79try again. Buffers are enlarged by first seeing how large the current buffers
80are: 80are:
81$ cat /debug/tracing/buffer_size_kb 81$ cat /sys/kernel/debug/tracing/buffer_size_kb
82gives you a number. Approximately double this number and write it back, for 82gives you a number. Approximately double this number and write it back, for
83instance: 83instance:
84$ echo 128000 > /debug/tracing/buffer_size_kb 84$ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb
85Then start again from the top. 85Then start again from the top.
86 86
87If you are doing a trace for a driver project, e.g. Nouveau, you should also 87If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 000000000000..cd805e16dc27
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
1The power tracer collects detailed information about C-state and P-state
2transitions, instead of just looking at the high-level "average"
3information.
4
5There is a helper script found in scrips/tracing/power.pl in the kernel
6sources which can be used to parse this information and create a
7Scalable Vector Graphics (SVG) picture from the trace data.
8
9To use this tracer:
10
11 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
12 echo power > /sys/kernel/debug/tracing/current_tracer
13 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
14 sleep 1
15 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
16 cat /sys/kernel/debug/tracing/trace | \
17 perl scripts/tracing/power.pl > out.sv
diff --git a/Documentation/tracepoints.txt b/Documentation/trace/tracepoints.txt
index 6f0a044f5b5e..c0e1ceed75a4 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/trace/tracepoints.txt
@@ -45,8 +45,8 @@ In include/trace/subsys.h :
45#include <linux/tracepoint.h> 45#include <linux/tracepoint.h>
46 46
47DECLARE_TRACE(subsys_eventname, 47DECLARE_TRACE(subsys_eventname,
48 TPPROTO(int firstarg, struct task_struct *p), 48 TP_PROTO(int firstarg, struct task_struct *p),
49 TPARGS(firstarg, p)); 49 TP_ARGS(firstarg, p));
50 50
51In subsys/file.c (where the tracing statement must be added) : 51In subsys/file.c (where the tracing statement must be added) :
52 52
@@ -66,10 +66,10 @@ Where :
66 - subsys is the name of your subsystem. 66 - subsys is the name of your subsystem.
67 - eventname is the name of the event to trace. 67 - eventname is the name of the event to trace.
68 68
69- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the 69- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the
70 function called by this tracepoint. 70 function called by this tracepoint.
71 71
72- TPARGS(firstarg, p) are the parameters names, same as found in the 72- TP_ARGS(firstarg, p) are the parameters names, same as found in the
73 prototype. 73 prototype.
74 74
75Connecting a function (probe) to a tracepoint is done by providing a 75Connecting a function (probe) to a tracepoint is done by providing a
@@ -103,13 +103,14 @@ used to export the defined tracepoints.
103 103
104* Probe / tracepoint example 104* Probe / tracepoint example
105 105
106See the example provided in samples/tracepoints/src 106See the example provided in samples/tracepoints
107 107
108Compile them with your kernel. 108Compile them with your kernel. They are built during 'make' (not
109'make modules') when CONFIG_SAMPLE_TRACEPOINTS=m.
109 110
110Run, as root : 111Run, as root :
111modprobe tracepoint-example (insmod order is not important) 112modprobe tracepoint-sample (insmod order is not important)
112modprobe tracepoint-probe-example 113modprobe tracepoint-probe-sample
113cat /proc/tracepoint-example (returns an expected error) 114cat /proc/tracepoint-sample (returns an expected error)
114rmmod tracepoint-example tracepoint-probe-example 115rmmod tracepoint-sample tracepoint-probe-sample
115dmesg 116dmesg
diff --git a/Documentation/usb/WUSB-Design-overview.txt b/Documentation/usb/WUSB-Design-overview.txt
index 4c3d62c7843a..c480e9c32dbd 100644
--- a/Documentation/usb/WUSB-Design-overview.txt
+++ b/Documentation/usb/WUSB-Design-overview.txt
@@ -84,7 +84,7 @@ The different logical parts of this driver are:
84 84
85 *UWB*: the Ultra-Wide-Band stack -- manages the radio and 85 *UWB*: the Ultra-Wide-Band stack -- manages the radio and
86 associated spectrum to allow for devices sharing it. Allows to 86 associated spectrum to allow for devices sharing it. Allows to
87 control bandwidth assingment, beaconing, scanning, etc 87 control bandwidth assignment, beaconing, scanning, etc
88 88
89 * 89 *
90 90
@@ -184,7 +184,7 @@ and sends the replies and notifications back to the API
184[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that 184[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
185is chartered, among other things, to keep the tab of how the UWB radio 185is chartered, among other things, to keep the tab of how the UWB radio
186neighborhood looks, creating and destroying devices as they show up or 186neighborhood looks, creating and destroying devices as they show up or
187dissapear. 187disappear.
188 188
189Command execution is very simple: a command block is sent and a event 189Command execution is very simple: a command block is sent and a event
190block or reply is expected back. For sending/receiving command/events, a 190block or reply is expected back. For sending/receiving command/events, a
@@ -333,7 +333,7 @@ read descriptors and move our data.
333 333
334*Device life cycle and keep alives* 334*Device life cycle and keep alives*
335 335
336Everytime there is a succesful transfer to/from a device, we update a 336Every time there is a successful transfer to/from a device, we update a
337per-device activity timestamp. If not, every now and then we check and 337per-device activity timestamp. If not, every now and then we check and
338if the activity timestamp gets old, we ping the device by sending it a 338if the activity timestamp gets old, we ping the device by sending it a
339Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this 339Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
@@ -411,7 +411,7 @@ context (wa_xfer) and submit it. When the xfer is done, our callback is
411called and we assign the status bits and release the xfer resources. 411called and we assign the status bits and release the xfer resources.
412 412
413In dequeue() we are basically cancelling/aborting the transfer. We issue 413In dequeue() we are basically cancelling/aborting the transfer. We issue
414a xfer abort request to the HC, cancell all the URBs we had submitted 414a xfer abort request to the HC, cancel all the URBs we had submitted
415and not yet done and when all that is done, the xfer callback will be 415and not yet done and when all that is done, the xfer callback will be
416called--this will call the URB callback. 416called--this will call the URB callback.
417 417
diff --git a/Documentation/usb/anchors.txt b/Documentation/usb/anchors.txt
index 6f24f566955a..fe6a99a32bbd 100644
--- a/Documentation/usb/anchors.txt
+++ b/Documentation/usb/anchors.txt
@@ -27,7 +27,7 @@ Association and disassociation of URBs with anchors
27 27
28An association of URBs to an anchor is made by an explicit 28An association of URBs to an anchor is made by an explicit
29call to usb_anchor_urb(). The association is maintained until 29call to usb_anchor_urb(). The association is maintained until
30an URB is finished by (successfull) completion. Thus disassociation 30an URB is finished by (successful) completion. Thus disassociation
31is automatic. A function is provided to forcibly finish (kill) 31is automatic. A function is provided to forcibly finish (kill)
32all URBs associated with an anchor. 32all URBs associated with an anchor.
33Furthermore, disassociation can be made with usb_unanchor_urb() 33Furthermore, disassociation can be made with usb_unanchor_urb()
@@ -76,4 +76,4 @@ usb_get_from_anchor()
76Returns the oldest anchored URB of an anchor. The URB is unanchored 76Returns the oldest anchored URB of an anchor. The URB is unanchored
77and returned with a reference. As you may mix URBs to several 77and returned with a reference. As you may mix URBs to several
78destinations in one anchor you have no guarantee the chronologically 78destinations in one anchor you have no guarantee the chronologically
79first submitted URB is returned. \ No newline at end of file 79first submitted URB is returned.
diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
index 7c812411945b..bfb36b34b79e 100644
--- a/Documentation/usb/callbacks.txt
+++ b/Documentation/usb/callbacks.txt
@@ -65,7 +65,7 @@ Accept or decline an interface. If you accept the device return 0,
65otherwise -ENODEV or -ENXIO. Other error codes should be used only if a 65otherwise -ENODEV or -ENXIO. Other error codes should be used only if a
66genuine error occurred during initialisation which prevented a driver 66genuine error occurred during initialisation which prevented a driver
67from accepting a device that would else have been accepted. 67from accepting a device that would else have been accepted.
68You are strongly encouraged to use usbcore'sfacility, 68You are strongly encouraged to use usbcore's facility,
69usb_set_intfdata(), to associate a data structure with an interface, so 69usb_set_intfdata(), to associate a data structure with an interface, so
70that you know which internal state and identity you associate with a 70that you know which internal state and identity you associate with a
71particular interface. The device will not be suspended and you may do IO 71particular interface. The device will not be suspended and you may do IO
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
index 270481906dc8..6c3c625b7f30 100644
--- a/Documentation/usb/usbmon.txt
+++ b/Documentation/usb/usbmon.txt
@@ -229,16 +229,26 @@ struct usbmon_packet {
229 int status; /* 28: */ 229 int status; /* 28: */
230 unsigned int length; /* 32: Length of data (submitted or actual) */ 230 unsigned int length; /* 32: Length of data (submitted or actual) */
231 unsigned int len_cap; /* 36: Delivered length */ 231 unsigned int len_cap; /* 36: Delivered length */
232 unsigned char setup[8]; /* 40: Only for Control 'S' */ 232 union { /* 40: */
233}; /* 48 bytes total */ 233 unsigned char setup[SETUP_LEN]; /* Only for Control S-type */
234 struct iso_rec { /* Only for ISO */
235 int error_count;
236 int numdesc;
237 } iso;
238 } s;
239 int interval; /* 48: Only for Interrupt and ISO */
240 int start_frame; /* 52: For ISO */
241 unsigned int xfer_flags; /* 56: copy of URB's transfer_flags */
242 unsigned int ndesc; /* 60: Actual number of ISO descriptors */
243}; /* 64 total length */
234 244
235These events can be received from a character device by reading with read(2), 245These events can be received from a character device by reading with read(2),
236with an ioctl(2), or by accessing the buffer with mmap. 246with an ioctl(2), or by accessing the buffer with mmap. However, read(2)
247only returns first 48 bytes for compatibility reasons.
237 248
238The character device is usually called /dev/usbmonN, where N is the USB bus 249The character device is usually called /dev/usbmonN, where N is the USB bus
239number. Number zero (/dev/usbmon0) is special and means "all buses". 250number. Number zero (/dev/usbmon0) is special and means "all buses".
240However, this feature is not implemented yet. Note that specific naming 251Note that specific naming policy is set by your Linux distribution.
241policy is set by your Linux distribution.
242 252
243If you create /dev/usbmon0 by hand, make sure that it is owned by root 253If you create /dev/usbmon0 by hand, make sure that it is owned by root
244and has mode 0600. Otherwise, unpriviledged users will be able to snoop 254and has mode 0600. Otherwise, unpriviledged users will be able to snoop
@@ -279,9 +289,10 @@ size is out of [unspecified] bounds for this kernel, the call fails with
279This call returns the current size of the buffer in bytes. 289This call returns the current size of the buffer in bytes.
280 290
281 MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg) 291 MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg)
292 MON_IOCX_GETX, defined as _IOW(MON_IOC_MAGIC, 10, struct mon_get_arg)
282 293
283This call waits for events to arrive if none were in the kernel buffer, 294These calls wait for events to arrive if none were in the kernel buffer,
284then returns the first event. Its argument is a pointer to the following 295then return the first event. The argument is a pointer to the following
285structure: 296structure:
286 297
287struct mon_get_arg { 298struct mon_get_arg {
@@ -294,6 +305,8 @@ Before the call, hdr, data, and alloc should be filled. Upon return, the area
294pointed by hdr contains the next event structure, and the data buffer contains 305pointed by hdr contains the next event structure, and the data buffer contains
295the data, if any. The event is removed from the kernel buffer. 306the data, if any. The event is removed from the kernel buffer.
296 307
308The MON_IOCX_GET copies 48 bytes, MON_IOCX_GETX copies 64 bytes.
309
297 MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg) 310 MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg)
298 311
299This ioctl is primarily used when the application accesses the buffer 312This ioctl is primarily used when the application accesses the buffer
diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv
index 0d93fa1ac25e..f11c583295e9 100644
--- a/Documentation/video4linux/CARDLIST.bttv
+++ b/Documentation/video4linux/CARDLIST.bttv
@@ -135,7 +135,7 @@
135134 -> Adlink RTV24 135134 -> Adlink RTV24
136135 -> DViCO FusionHDTV 5 Lite [18ac:d500] 136135 -> DViCO FusionHDTV 5 Lite [18ac:d500]
137136 -> Acorp Y878F [9511:1540] 137136 -> Acorp Y878F [9511:1540]
138137 -> Conceptronic CTVFMi v2 138137 -> Conceptronic CTVFMi v2 [036e:109e]
139138 -> Prolink Pixelview PV-BT878P+ (Rev.2E) 139138 -> Prolink Pixelview PV-BT878P+ (Rev.2E)
140139 -> Prolink PixelView PlayTV MPEG2 PV-M4900 140139 -> Prolink PixelView PlayTV MPEG2 PV-M4900
141140 -> Osprey 440 [0070:ff07] 141140 -> Osprey 440 [0070:ff07]
@@ -154,3 +154,7 @@
154153 -> PHYTEC VD-012 (bt878) 154153 -> PHYTEC VD-012 (bt878)
155154 -> PHYTEC VD-012-X1 (bt878) 155154 -> PHYTEC VD-012-X1 (bt878)
156155 -> PHYTEC VD-012-X2 (bt878) 156155 -> PHYTEC VD-012-X2 (bt878)
157156 -> IVCE-8784 [0000:f050,0001:f050,0002:f050,0003:f050]
158157 -> Geovision GV-800(S) (master) [800a:763d]
159158 -> Geovision GV-800(S) (slave) [800b:763d,800c:763d,800d:763d]
160159 -> ProVideo PV183 [1830:1540,1831:1540,1832:1540,1833:1540,1834:1540,1835:1540,1836:1540,1837:1540]
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885
index 35ea130e9898..450b8f8c389b 100644
--- a/Documentation/video4linux/CARDLIST.cx23885
+++ b/Documentation/video4linux/CARDLIST.cx23885
@@ -12,3 +12,12 @@
12 11 -> DViCO FusionHDTV DVB-T Dual Express [18ac:db78] 12 11 -> DViCO FusionHDTV DVB-T Dual Express [18ac:db78]
13 12 -> Leadtek Winfast PxDVR3200 H [107d:6681] 13 12 -> Leadtek Winfast PxDVR3200 H [107d:6681]
14 13 -> Compro VideoMate E650F [185b:e800] 14 13 -> Compro VideoMate E650F [185b:e800]
15 14 -> TurboSight TBS 6920 [6920:8888]
16 15 -> TeVii S470 [d470:9022]
17 16 -> DVBWorld DVB-S2 2005 [0001:2005]
18 17 -> NetUP Dual DVB-S2 CI [1b55:2a2c]
19 18 -> Hauppauge WinTV-HVR1270 [0070:2211]
20 19 -> Hauppauge WinTV-HVR1275 [0070:2215]
21 20 -> Hauppauge WinTV-HVR1255 [0070:2251]
22 21 -> Hauppauge WinTV-HVR1210 [0070:2291,0070:2295]
23 22 -> Mygica X8506 DMB-TH [14f1:8651]
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88
index 0d08f1edcf6d..89093f531727 100644
--- a/Documentation/video4linux/CARDLIST.cx88
+++ b/Documentation/video4linux/CARDLIST.cx88
@@ -77,3 +77,6 @@
77 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200] 77 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200]
78 77 -> TBS 8910 DVB-S [8910:8888] 78 77 -> TBS 8910 DVB-S [8910:8888]
79 78 -> Prof 6200 DVB-S [b022:3022] 79 78 -> Prof 6200 DVB-S [b022:3022]
80 79 -> Terratec Cinergy HT PCI MKII [153b:1177]
81 80 -> Hauppauge WinTV-IR Only [0070:9290]
82 81 -> Leadtek WinFast DTV1800 Hybrid [107d:6654]
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index 75bded8a4aa2..a98a688c11b8 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -7,17 +7,17 @@
7 6 -> Terratec Cinergy 200 USB (em2800) 7 6 -> Terratec Cinergy 200 USB (em2800)
8 7 -> Leadtek Winfast USB II (em2800) [0413:6023] 8 7 -> Leadtek Winfast USB II (em2800) [0413:6023]
9 8 -> Kworld USB2800 (em2800) 9 8 -> Kworld USB2800 (em2800)
10 9 -> Pinnacle Dazzle DVC 90/DVC 100 (em2820/em2840) [2304:0207,2304:021a] 10 9 -> Pinnacle Dazzle DVC 90/100/101/107 / Kaiser Baas Video to DVD maker (em2820/em2840) [1b80:e302,2304:0207,2304:021a]
11 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500] 11 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500]
12 11 -> Terratec Hybrid XS (em2880) [0ccd:0042] 12 11 -> Terratec Hybrid XS (em2880) [0ccd:0042]
13 12 -> Kworld PVR TV 2800 RF (em2820/em2840) 13 12 -> Kworld PVR TV 2800 RF (em2820/em2840)
14 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] 14 13 -> Terratec Prodigy XS (em2880) [0ccd:0047]
15 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) 15 14 -> SIIG AVTuner-PVR / Pixelview Prolink PlayTV USB 2.0 (em2820/em2840)
16 15 -> V-Gear PocketTV (em2800) 16 15 -> V-Gear PocketTV (em2800)
17 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b] 17 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b]
18 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227] 18 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227]
19 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502] 19 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502]
20 19 -> PointNix Intra-Oral Camera (em2860) 20 19 -> EM2860/SAA711X Reference Design (em2860)
21 20 -> AMD ATI TV Wonder HD 600 (em2880) [0438:b002] 21 20 -> AMD ATI TV Wonder HD 600 (em2880) [0438:b002]
22 21 -> eMPIA Technology, Inc. GrabBeeX+ Video Encoder (em2800) [eb1a:2801] 22 21 -> eMPIA Technology, Inc. GrabBeeX+ Video Encoder (em2800) [eb1a:2801]
23 22 -> Unknown EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751] 23 22 -> Unknown EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751]
@@ -30,7 +30,6 @@
30 30 -> Videology 20K14XUSB USB2.0 (em2820/em2840) 30 30 -> Videology 20K14XUSB USB2.0 (em2820/em2840)
31 31 -> Usbgear VD204v9 (em2821) 31 31 -> Usbgear VD204v9 (em2821)
32 32 -> Supercomp USB 2.0 TV (em2821) 32 32 -> Supercomp USB 2.0 TV (em2821)
33 33 -> SIIG AVTuner-PVR/Prolink PlayTV USB 2.0 (em2821)
34 34 -> Terratec Cinergy A Hybrid XS (em2860) [0ccd:004f] 33 34 -> Terratec Cinergy A Hybrid XS (em2860) [0ccd:004f]
35 35 -> Typhoon DVD Maker (em2860) 34 35 -> Typhoon DVD Maker (em2860)
36 36 -> NetGMBH Cam (em2860) 35 36 -> NetGMBH Cam (em2860)
@@ -58,3 +57,11 @@
58 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] 57 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041]
59 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] 58 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f]
60 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840) 59 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840)
60 62 -> Gadmei TVR200 (em2820/em2840)
61 63 -> Kaiomy TVnPC U2 (em2860) [eb1a:e303]
62 64 -> Easy Cap Capture DC-60 (em2860)
63 65 -> IO-DATA GV-MVP/SZ (em2820/em2840) [04bb:0515]
64 66 -> Empire dual TV (em2880)
65 67 -> Terratec Grabby (em2860) [0ccd:0096]
66 68 -> Terratec AV350 (em2860) [0ccd:0084]
67 69 -> KWorld ATSC 315U HDTV TV Box (em2882) [eb1a:a313]
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index b8d470596b0c..15562427e8a9 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -124,10 +124,10 @@
124123 -> Beholder BeholdTV 407 [0000:4070] 124123 -> Beholder BeholdTV 407 [0000:4070]
125124 -> Beholder BeholdTV 407 FM [0000:4071] 125124 -> Beholder BeholdTV 407 FM [0000:4071]
126125 -> Beholder BeholdTV 409 [0000:4090] 126125 -> Beholder BeholdTV 409 [0000:4090]
127126 -> Beholder BeholdTV 505 FM/RDS [0000:5051,0000:505B,5ace:5050] 127126 -> Beholder BeholdTV 505 FM [5ace:5050]
128127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] 128127 -> Beholder BeholdTV 507 FM / BeholdTV 509 FM [5ace:5070,5ace:5090]
129128 -> Beholder BeholdTV Columbus TVFM [0000:5201] 129128 -> Beholder BeholdTV Columbus TVFM [0000:5201]
130129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] 130129 -> Beholder BeholdTV 607 FM [5ace:6070]
131130 -> Beholder BeholdTV M6 [5ace:6190] 131130 -> Beholder BeholdTV M6 [5ace:6190]
132131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] 132131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022]
133132 -> Genius TVGO AM11MCE 133132 -> Genius TVGO AM11MCE
@@ -143,7 +143,7 @@
143142 -> Beholder BeholdTV H6 [5ace:6290] 143142 -> Beholder BeholdTV H6 [5ace:6290]
144143 -> Beholder BeholdTV M63 [5ace:6191] 144143 -> Beholder BeholdTV M63 [5ace:6191]
145144 -> Beholder BeholdTV M6 Extra [5ace:6193] 145144 -> Beholder BeholdTV M6 Extra [5ace:6193]
146145 -> AVerMedia MiniPCI DVB-T Hybrid M103 [1461:f636] 146145 -> AVerMedia MiniPCI DVB-T Hybrid M103 [1461:f636,1461:f736]
147146 -> ASUSTeK P7131 Analog 147146 -> ASUSTeK P7131 Analog
148147 -> Asus Tiger 3in1 [1043:4878] 148147 -> Asus Tiger 3in1 [1043:4878]
149148 -> Encore ENLTV-FM v5.3 [1a7f:2008] 149148 -> Encore ENLTV-FM v5.3 [1a7f:2008]
@@ -153,3 +153,17 @@
153152 -> Asus Tiger Rev:1.00 [1043:4857] 153152 -> Asus Tiger Rev:1.00 [1043:4857]
154153 -> Kworld Plus TV Analog Lite PCI [17de:7128] 154153 -> Kworld Plus TV Analog Lite PCI [17de:7128]
155154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d] 155154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d]
156155 -> Hauppauge WinTV-HVR1120 ATSC/QAM-Hybrid [0070:6706,0070:6708]
157156 -> Hauppauge WinTV-HVR1110r3 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a]
158157 -> Avermedia AVerTV Studio 507UA [1461:a11b]
159158 -> AVerMedia Cardbus TV/Radio (E501R) [1461:b7e9]
160159 -> Beholder BeholdTV 505 RDS [0000:505B]
161160 -> Beholder BeholdTV 507 RDS [0000:5071]
162161 -> Beholder BeholdTV 507 RDS [0000:507B]
163162 -> Beholder BeholdTV 607 FM [5ace:6071]
164163 -> Beholder BeholdTV 609 FM [5ace:6090]
165164 -> Beholder BeholdTV 609 FM [5ace:6091]
166165 -> Beholder BeholdTV 607 RDS [5ace:6072]
167166 -> Beholder BeholdTV 607 RDS [5ace:6073]
168167 -> Beholder BeholdTV 609 RDS [5ace:6092]
169168 -> Beholder BeholdTV 609 RDS [5ace:6093]
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index 691d2f37dc57..be67844074dd 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -76,3 +76,5 @@ tuner=75 - Philips TEA5761 FM Radio
76tuner=76 - Xceive 5000 tuner 76tuner=76 - Xceive 5000 tuner
77tuner=77 - TCL tuner MF02GIP-5N-E 77tuner=77 - TCL tuner MF02GIP-5N-E
78tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner 78tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
79tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
80tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran
index 295462b2317a..0e89e7676298 100644
--- a/Documentation/video4linux/Zoran
+++ b/Documentation/video4linux/Zoran
@@ -401,8 +401,7 @@ Additional notes for software developers:
401 first set the correct norm. Well, it seems logically correct: TV 401 first set the correct norm. Well, it seems logically correct: TV
402 standard is "more constant" for current country than geometry 402 standard is "more constant" for current country than geometry
403 settings of a variety of TV capture cards which may work in ITU or 403 settings of a variety of TV capture cards which may work in ITU or
404 square pixel format. Remember that users now can lock the norm to 404 square pixel format.
405 avoid any ambiguity.
406-- 405--
407Please note that lavplay/lavrec are also included in the MJPEG-tools 406Please note that lavplay/lavrec are also included in the MJPEG-tools
408(http://mjpeg.sf.net/). 407(http://mjpeg.sf.net/).
diff --git a/Documentation/video4linux/bttv/Insmod-options b/Documentation/video4linux/bttv/Insmod-options
index 5ef75787f83a..bbe3ed667d91 100644
--- a/Documentation/video4linux/bttv/Insmod-options
+++ b/Documentation/video4linux/bttv/Insmod-options
@@ -81,16 +81,6 @@ tuner.o
81 pal=[bdgil] select PAL variant (used for some tuners 81 pal=[bdgil] select PAL variant (used for some tuners
82 only, important for the audio carrier). 82 only, important for the audio carrier).
83 83
84tvmixer.o
85 registers a mixer device for the TV card's volume/bass/treble
86 controls (requires a i2c audio control chip like the msp3400).
87
88 insmod args:
89 debug=1 print some debug info to the syslog.
90 devnr=n allocate device #n (0 == /dev/mixer,
91 1 = /dev/mixer1, ...), default is to
92 use the first free one.
93
94tvaudio.o 84tvaudio.o
95 new, experimental module which is supported to provide a single 85 new, experimental module which is supported to provide a single
96 driver for all simple i2c audio control chips (tda/tea*). 86 driver for all simple i2c audio control chips (tda/tea*).
diff --git a/Documentation/video4linux/bttv/README b/Documentation/video4linux/bttv/README
index 7ca2154c2bf5..3a367cdb664e 100644
--- a/Documentation/video4linux/bttv/README
+++ b/Documentation/video4linux/bttv/README
@@ -63,8 +63,8 @@ If you have some knowledge and spare time, please try to fix this
63yourself (patches very welcome of course...) You know: The linux 63yourself (patches very welcome of course...) You know: The linux
64slogan is "Do it yourself". 64slogan is "Do it yourself".
65 65
66There is a mailing list: video4linux-list@redhat.com. 66There is a mailing list: linux-media@vger.kernel.org
67https://listman.redhat.com/mailman/listinfo/video4linux-list 67http://vger.kernel.org/vger-lists.html#linux-media
68 68
69If you have trouble with some specific TV card, try to ask there 69If you have trouble with some specific TV card, try to ask there
70instead of mailing me directly. The chance that someone with the 70instead of mailing me directly. The chance that someone with the
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt
index 914cb7e734a2..4652c0f5da32 100644
--- a/Documentation/video4linux/cx18.txt
+++ b/Documentation/video4linux/cx18.txt
@@ -11,7 +11,7 @@ encoder chip:
112) Some people have problems getting the i2c bus to work. 112) Some people have problems getting the i2c bus to work.
12 The symptom is that the eeprom cannot be read and the card is 12 The symptom is that the eeprom cannot be read and the card is
13 unusable. This is probably fixed, but if you have problems 13 unusable. This is probably fixed, but if you have problems
14 then post to the video4linux or ivtv-users mailinglist. 14 then post to the video4linux or ivtv-users mailing list.
15 15
163) VBI (raw or sliced) has not yet been implemented. 163) VBI (raw or sliced) has not yet been implemented.
17 17
diff --git a/Documentation/video4linux/cx2341x/README.hm12 b/Documentation/video4linux/cx2341x/README.hm12
index 0e213ed095e6..b36148ea0750 100644
--- a/Documentation/video4linux/cx2341x/README.hm12
+++ b/Documentation/video4linux/cx2341x/README.hm12
@@ -32,6 +32,10 @@ Y, U and V planes. This code assumes frames of 720x576 (PAL) pixels.
32The width of a frame is always 720 pixels, regardless of the actual specified 32The width of a frame is always 720 pixels, regardless of the actual specified
33width. 33width.
34 34
35If the height is not a multiple of 32 lines, then the captured video is
36missing macroblocks at the end and is unusable. So the height must be a
37multiple of 32.
38
35-------------------------------------------------------------------------- 39--------------------------------------------------------------------------
36 40
37#include <stdio.h> 41#include <stdio.h>
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 1c58a7630146..2bcf78896e22 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -32,6 +32,7 @@ spca561 041e:403b Creative Webcam Vista (VF0010)
32zc3xx 041e:4051 Creative Live!Cam Notebook Pro (VF0250) 32zc3xx 041e:4051 Creative Live!Cam Notebook Pro (VF0250)
33ov519 041e:4052 Creative Live! VISTA IM 33ov519 041e:4052 Creative Live! VISTA IM
34zc3xx 041e:4053 Creative Live!Cam Video IM 34zc3xx 041e:4053 Creative Live!Cam Video IM
35vc032x 041e:405b Creative Live! Cam Notebook Ultra (VC0130)
35ov519 041e:405f Creative Live! VISTA VF0330 36ov519 041e:405f Creative Live! VISTA VF0330
36ov519 041e:4060 Creative Live! VISTA VF0350 37ov519 041e:4060 Creative Live! VISTA VF0350
37ov519 041e:4061 Creative Live! VISTA VF0400 38ov519 041e:4061 Creative Live! VISTA VF0400
@@ -162,10 +163,11 @@ sunplus 055f:c650 Mustek MDC5500Z
162zc3xx 055f:d003 Mustek WCam300A 163zc3xx 055f:d003 Mustek WCam300A
163zc3xx 055f:d004 Mustek WCam300 AN 164zc3xx 055f:d004 Mustek WCam300 AN
164conex 0572:0041 Creative Notebook cx11646 165conex 0572:0041 Creative Notebook cx11646
165ov519 05a9:0519 OmniVision 166ov519 05a9:0519 OV519 Microphone
166ov519 05a9:0530 OmniVision 167ov519 05a9:0530 OmniVision
167ov519 05a9:4519 OmniVision 168ov519 05a9:4519 Webcam Classic
168ov519 05a9:8519 OmniVision 169ov519 05a9:8519 OmniVision
170ov519 05a9:a518 D-Link DSB-C310 Webcam
169sunplus 05da:1018 Digital Dream Enigma 1.3 171sunplus 05da:1018 Digital Dream Enigma 1.3
170stk014 05e1:0893 Syntek DV4000 172stk014 05e1:0893 Syntek DV4000
171spca561 060b:a001 Maxell Compact Pc PM3 173spca561 060b:a001 Maxell Compact Pc PM3
@@ -177,6 +179,7 @@ spca506 06e1:a190 ADS Instant VCD
177ov534 06f8:3002 Hercules Blog Webcam 179ov534 06f8:3002 Hercules Blog Webcam
178ov534 06f8:3003 Hercules Dualpix HD Weblog 180ov534 06f8:3003 Hercules Dualpix HD Weblog
179sonixj 06f8:3004 Hercules Classic Silver 181sonixj 06f8:3004 Hercules Classic Silver
182sonixj 06f8:3008 Hercules Deluxe Optical Glass
180spca508 0733:0110 ViewQuest VQ110 183spca508 0733:0110 ViewQuest VQ110
181spca508 0130:0130 Clone Digital Webcam 11043 184spca508 0130:0130 Clone Digital Webcam 11043
182spca501 0733:0401 Intel Create and Share 185spca501 0733:0401 Intel Create and Share
@@ -193,6 +196,7 @@ spca500 084d:0003 D-Link DSC-350
193spca500 08ca:0103 Aiptek PocketDV 196spca500 08ca:0103 Aiptek PocketDV
194sunplus 08ca:0104 Aiptek PocketDVII 1.3 197sunplus 08ca:0104 Aiptek PocketDVII 1.3
195sunplus 08ca:0106 Aiptek Pocket DV3100+ 198sunplus 08ca:0106 Aiptek Pocket DV3100+
199mr97310a 08ca:0111 Aiptek PenCam VGA+
196sunplus 08ca:2008 Aiptek Mini PenCam 2 M 200sunplus 08ca:2008 Aiptek Mini PenCam 2 M
197sunplus 08ca:2010 Aiptek PocketCam 3M 201sunplus 08ca:2010 Aiptek PocketCam 3M
198sunplus 08ca:2016 Aiptek PocketCam 2 Mega 202sunplus 08ca:2016 Aiptek PocketCam 2 Mega
@@ -207,6 +211,7 @@ sunplus 08ca:2050 Medion MD 41437
207sunplus 08ca:2060 Aiptek PocketDV5300 211sunplus 08ca:2060 Aiptek PocketDV5300
208tv8532 0923:010f ICM532 cams 212tv8532 0923:010f ICM532 cams
209mars 093a:050f Mars-Semi Pc-Camera 213mars 093a:050f Mars-Semi Pc-Camera
214mr97310a 093a:010f Sakar Digital no. 77379
210pac207 093a:2460 Qtec Webcam 100 215pac207 093a:2460 Qtec Webcam 100
211pac207 093a:2461 HP Webcam 216pac207 093a:2461 HP Webcam
212pac207 093a:2463 Philips SPC 220 NC 217pac207 093a:2463 Philips SPC 220 NC
@@ -215,6 +220,7 @@ pac207 093a:2468 PAC207
215pac207 093a:2470 Genius GF112 220pac207 093a:2470 Genius GF112
216pac207 093a:2471 Genius VideoCam ge111 221pac207 093a:2471 Genius VideoCam ge111
217pac207 093a:2472 Genius VideoCam ge110 222pac207 093a:2472 Genius VideoCam ge110
223pac207 093a:2474 Genius iLook 111
218pac207 093a:2476 Genius e-Messenger 112 224pac207 093a:2476 Genius e-Messenger 112
219pac7311 093a:2600 PAC7311 Typhoon 225pac7311 093a:2600 PAC7311 Typhoon
220pac7311 093a:2601 Philips SPC 610 NC 226pac7311 093a:2601 Philips SPC 610 NC
@@ -262,6 +268,11 @@ sonixj 0c45:60ec SN9C105+MO4000
262sonixj 0c45:60fb Surfer NoName 268sonixj 0c45:60fb Surfer NoName
263sonixj 0c45:60fc LG-LIC300 269sonixj 0c45:60fc LG-LIC300
264sonixj 0c45:60fe Microdia Audio 270sonixj 0c45:60fe Microdia Audio
271sonixj 0c45:6100 PC Camera (SN9C128)
272sonixj 0c45:610a PC Camera (SN9C128)
273sonixj 0c45:610b PC Camera (SN9C128)
274sonixj 0c45:610c PC Camera (SN9C128)
275sonixj 0c45:610e PC Camera (SN9C128)
265sonixj 0c45:6128 Microdia/Sonix SNP325 276sonixj 0c45:6128 Microdia/Sonix SNP325
266sonixj 0c45:612a Avant Camera 277sonixj 0c45:612a Avant Camera
267sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix 278sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix
@@ -279,6 +290,7 @@ spca561 10fd:7e50 FlyCam Usb 100
279zc3xx 10fd:8050 Typhoon Webshot II USB 300k 290zc3xx 10fd:8050 Typhoon Webshot II USB 300k
280ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201) 291ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201)
281pac207 145f:013a Trust WB-1300N 292pac207 145f:013a Trust WB-1300N
293vc032x 15b8:6001 HP 2.0 Megapixel
282vc032x 15b8:6002 HP 2.0 Megapixel rz406aa 294vc032x 15b8:6002 HP 2.0 Megapixel rz406aa
283spca501 1776:501c Arowana 300K CMOS Camera 295spca501 1776:501c Arowana 300K CMOS Camera
284t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops 296t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops
diff --git a/Documentation/video4linux/pxa_camera.txt b/Documentation/video4linux/pxa_camera.txt
new file mode 100644
index 000000000000..4f6d0ca01956
--- /dev/null
+++ b/Documentation/video4linux/pxa_camera.txt
@@ -0,0 +1,174 @@
1 PXA-Camera Host Driver
2 ======================
3
4Constraints
5-----------
6 a) Image size for YUV422P format
7 All YUV422P images are enforced to have width x height % 16 = 0.
8 This is due to DMA constraints, which transfers only planes of 8 byte
9 multiples.
10
11
12Global video workflow
13---------------------
14 a) QCI stopped
15 Initialy, the QCI interface is stopped.
16 When a buffer is queued (pxa_videobuf_ops->buf_queue), the QCI starts.
17
18 b) QCI started
19 More buffers can be queued while the QCI is started without halting the
20 capture. The new buffers are "appended" at the tail of the DMA chain, and
21 smoothly captured one frame after the other.
22
23 Once a buffer is filled in the QCI interface, it is marked as "DONE" and
24 removed from the active buffers list. It can be then requeud or dequeued by
25 userland application.
26
27 Once the last buffer is filled in, the QCI interface stops.
28
29 c) Capture global finite state machine schema
30
31 +----+ +---+ +----+
32 | DQ | | Q | | DQ |
33 | v | v | v
34 +-----------+ +------------------------+
35 | STOP | | Wait for capture start |
36 +-----------+ Q +------------------------+
37+-> | QCI: stop | ------------------> | QCI: run | <------------+
38| | DMA: stop | | DMA: stop | |
39| +-----------+ +-----> +------------------------+ |
40| / | |
41| / +---+ +----+ | |
42|capture list empty / | Q | | DQ | | QCI Irq EOF |
43| / | v | v v |
44| +--------------------+ +----------------------+ |
45| | DMA hotlink missed | | Capture running | |
46| +--------------------+ +----------------------+ |
47| | QCI: run | +-----> | QCI: run | <-+ |
48| | DMA: stop | / | DMA: run | | |
49| +--------------------+ / +----------------------+ | Other |
50| ^ /DMA still | | channels |
51| | capture list / running | DMA Irq End | not |
52| | not empty / | | finished |
53| | / v | yet |
54| +----------------------+ +----------------------+ | |
55| | Videobuf released | | Channel completed | | |
56| +----------------------+ +----------------------+ | |
57+-- | QCI: run | | QCI: run | --+ |
58 | DMA: run | | DMA: run | |
59 +----------------------+ +----------------------+ |
60 ^ / | |
61 | no overrun / | overrun |
62 | / v |
63 +--------------------+ / +----------------------+ |
64 | Frame completed | / | Frame overran | |
65 +--------------------+ <-----+ +----------------------+ restart frame |
66 | QCI: run | | QCI: stop | --------------+
67 | DMA: run | | DMA: stop |
68 +--------------------+ +----------------------+
69
70 Legend: - each box is a FSM state
71 - each arrow is the condition to transition to another state
72 - an arrow with a comment is a mandatory transition (no condition)
73 - arrow "Q" means : a buffer was enqueued
74 - arrow "DQ" means : a buffer was dequeued
75 - "QCI: stop" means the QCI interface is not enabled
76 - "DMA: stop" means all 3 DMA channels are stopped
77 - "DMA: run" means at least 1 DMA channel is still running
78
79DMA usage
80---------
81 a) DMA flow
82 - first buffer queued for capture
83 Once a first buffer is queued for capture, the QCI is started, but data
84 transfer is not started. On "End Of Frame" interrupt, the irq handler
85 starts the DMA chain.
86 - capture of one videobuffer
87 The DMA chain starts transfering data into videobuffer RAM pages.
88 When all pages are transfered, the DMA irq is raised on "ENDINTR" status
89 - finishing one videobuffer
90 The DMA irq handler marks the videobuffer as "done", and removes it from
91 the active running queue
92 Meanwhile, the next videobuffer (if there is one), is transfered by DMA
93 - finishing the last videobuffer
94 On the DMA irq of the last videobuffer, the QCI is stopped.
95
96 b) DMA prepared buffer will have this structure
97
98 +------------+-----+---------------+-----------------+
99 | desc-sg[0] | ... | desc-sg[last] | finisher/linker |
100 +------------+-----+---------------+-----------------+
101
102 This structure is pointed by dma->sg_cpu.
103 The descriptors are used as follows :
104 - desc-sg[i]: i-th descriptor, transfering the i-th sg
105 element to the video buffer scatter gather
106 - finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN
107 - linker: has ddadr= desc-sg[0] of next video buffer, dcmd=0
108
109 For the next schema, let's assume d0=desc-sg[0] .. dN=desc-sg[N],
110 "f" stands for finisher and "l" for linker.
111 A typical running chain is :
112
113 Videobuffer 1 Videobuffer 2
114 +---------+----+---+ +----+----+----+---+
115 | d0 | .. | dN | l | | d0 | .. | dN | f |
116 +---------+----+-|-+ ^----+----+----+---+
117 | |
118 +----+
119
120 After the chaining is finished, the chain looks like :
121
122 Videobuffer 1 Videobuffer 2 Videobuffer 3
123 +---------+----+---+ +----+----+----+---+ +----+----+----+---+
124 | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f |
125 +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+
126 | | | |
127 +----+ +----+
128 new_link
129
130 c) DMA hot chaining timeslice issue
131
132 As DMA chaining is done while DMA _is_ running, the linking may be done
133 while the DMA jumps from one Videobuffer to another. On the schema, that
134 would be a problem if the following sequence is encountered :
135
136 - DMA chain is Videobuffer1 + Videobuffer2
137 - pxa_videobuf_queue() is called to queue Videobuffer3
138 - DMA controller finishes Videobuffer2, and DMA stops
139 =>
140 Videobuffer 1 Videobuffer 2
141 +---------+----+---+ +----+----+----+---+
142 | d0 | .. | dN | l | | d0 | .. | dN | f |
143 +---------+----+-|-+ ^----+----+----+-^-+
144 | | |
145 +----+ +-- DMA DDADR loads DDADR_STOP
146
147 - pxa_dma_add_tail_buf() is called, the Videobuffer2 "finisher" is
148 replaced by a "linker" to Videobuffer3 (creation of new_link)
149 - pxa_videobuf_queue() finishes
150 - the DMA irq handler is called, which terminates Videobuffer2
151 - Videobuffer3 capture is not scheduled on DMA chain (as it stopped !!!)
152
153 Videobuffer 1 Videobuffer 2 Videobuffer 3
154 +---------+----+---+ +----+----+----+---+ +----+----+----+---+
155 | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f |
156 +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+
157 | | | |
158 +----+ +----+
159 new_link
160 DMA DDADR still is DDADR_STOP
161
162 - pxa_camera_check_link_miss() is called
163 This checks if the DMA is finished and a buffer is still on the
164 pcdev->capture list. If that's the case, the capture will be restarted,
165 and Videobuffer3 is scheduled on DMA chain.
166 - the DMA irq handler finishes
167
168 Note: if DMA stops just after pxa_camera_check_link_miss() reads DDADR()
169 value, we have the guarantee that the DMA irq handler will be called back
170 when the DMA will finish the buffer, and pxa_camera_check_link_miss() will
171 be called again, to reschedule Videobuffer3.
172
173--
174Author: Robert Jarzmik <robert.jarzmik@free.fr>
diff --git a/Documentation/video4linux/si470x.txt b/Documentation/video4linux/si470x.txt
index 49679e6aaa76..3a7823e01b4d 100644
--- a/Documentation/video4linux/si470x.txt
+++ b/Documentation/video4linux/si470x.txt
@@ -1,6 +1,6 @@
1Driver for USB radios for the Silicon Labs Si470x FM Radio Receivers 1Driver for USB radios for the Silicon Labs Si470x FM Radio Receivers
2 2
3Copyright (c) 2008 Tobias Lorenz <tobias.lorenz@gmx.net> 3Copyright (c) 2009 Tobias Lorenz <tobias.lorenz@gmx.net>
4 4
5 5
6Information from Silicon Labs 6Information from Silicon Labs
@@ -41,7 +41,7 @@ chips are known to work:
41- 10c4:818a: Silicon Labs USB FM Radio Reference Design 41- 10c4:818a: Silicon Labs USB FM Radio Reference Design
42- 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF) 42- 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF)
43- 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700) 43- 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700)
44- 10c5:819a: DealExtreme USB Radio 44- 10c5:819a: Sanei Electric, Inc. FM USB Radio (sold as DealExtreme.com PCear)
45 45
46 46
47Software 47Software
@@ -52,6 +52,7 @@ Testing is usually done with most application under Debian/testing:
52- gradio - GTK FM radio tuner 52- gradio - GTK FM radio tuner
53- kradio - Comfortable Radio Application for KDE 53- kradio - Comfortable Radio Application for KDE
54- radio - ncurses-based radio application 54- radio - ncurses-based radio application
55- mplayer - The Ultimate Movie Player For Linux
55 56
56There is also a library libv4l, which can be used. It's going to have a function 57There is also a library libv4l, which can be used. It's going to have a function
57for frequency seeking, either by using hardware functionality as in radio-si470x 58for frequency seeking, either by using hardware functionality as in radio-si470x
@@ -69,7 +70,7 @@ Audio Listing
69USB Audio is provided by the ALSA snd_usb_audio module. It is recommended to 70USB Audio is provided by the ALSA snd_usb_audio module. It is recommended to
70also select SND_USB_AUDIO, as this is required to get sound from the radio. For 71also select SND_USB_AUDIO, as this is required to get sound from the radio. For
71listing you have to redirect the sound, for example using one of the following 72listing you have to redirect the sound, for example using one of the following
72commands. 73commands. Please adjust the audio devices to your needs (/dev/dsp* and hw:x,x).
73 74
74If you just want to test audio (very poor quality): 75If you just want to test audio (very poor quality):
75cat /dev/dsp1 > /dev/dsp 76cat /dev/dsp1 > /dev/dsp
@@ -80,6 +81,10 @@ sox -2 --endian little -r 96000 -t oss /dev/dsp1 -t oss /dev/dsp
80If you use arts try: 81If you use arts try:
81arecord -D hw:1,0 -r96000 -c2 -f S16_LE | artsdsp aplay -B - 82arecord -D hw:1,0 -r96000 -c2 -f S16_LE | artsdsp aplay -B -
82 83
84If you use mplayer try:
85mplayer -radio adevice=hw=1.0:arate=96000 \
86 -rawaudio rate=96000 \
87 radio://<frequency>/capture
83 88
84Module Parameters 89Module Parameters
85================= 90=================
diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index ff124374e9ba..d54c1e4c6a9c 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -47,7 +47,9 @@ All drivers have the following structure:
473) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX, /dev/radioX and 473) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX, /dev/radioX and
48 /dev/vtxX) and keeping track of device-node specific data. 48 /dev/vtxX) and keeping track of device-node specific data.
49 49
504) Filehandle-specific structs containing per-filehandle data. 504) Filehandle-specific structs containing per-filehandle data;
51
525) video buffer handling.
51 53
52This is a rough schematic of how it all relates: 54This is a rough schematic of how it all relates:
53 55
@@ -82,12 +84,25 @@ You must register the device instance:
82 v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev); 84 v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev);
83 85
84Registration will initialize the v4l2_device struct and link dev->driver_data 86Registration will initialize the v4l2_device struct and link dev->driver_data
85to v4l2_dev. Registration will also set v4l2_dev->name to a value derived from 87to v4l2_dev. If v4l2_dev->name is empty then it will be set to a value derived
86dev (driver name followed by the bus_id, to be precise). You may change the 88from dev (driver name followed by the bus_id, to be precise). If you set it
87name after registration if you want. 89up before calling v4l2_device_register then it will be untouched. If dev is
90NULL, then you *must* setup v4l2_dev->name before calling v4l2_device_register.
91
92You can use v4l2_device_set_name() to set the name based on a driver name and
93a driver-global atomic_t instance. This will generate names like ivtv0, ivtv1,
94etc. If the name ends with a digit, then it will insert a dash: cx18-0,
95cx18-1, etc. This function returns the instance number.
88 96
89The first 'dev' argument is normally the struct device pointer of a pci_dev, 97The first 'dev' argument is normally the struct device pointer of a pci_dev,
90usb_device or platform_device. 98usb_interface or platform_device. It is rare for dev to be NULL, but it happens
99with ISA devices or when one device creates multiple PCI devices, thus making
100it impossible to associate v4l2_dev with a particular parent.
101
102You can also supply a notify() callback that can be called by sub-devices to
103notify you of events. Whether you need to set this depends on the sub-device.
104Any notifications a sub-device supports must be defined in a header in
105include/media/<subdevice>.h.
91 106
92You unregister with: 107You unregister with:
93 108
@@ -95,6 +110,17 @@ You unregister with:
95 110
96Unregistering will also automatically unregister all subdevs from the device. 111Unregistering will also automatically unregister all subdevs from the device.
97 112
113If you have a hotpluggable device (e.g. a USB device), then when a disconnect
114happens the parent device becomes invalid. Since v4l2_device has a pointer to
115that parent device it has to be cleared as well to mark that the parent is
116gone. To do this call:
117
118 v4l2_device_disconnect(struct v4l2_device *v4l2_dev);
119
120This does *not* unregister the subdevs, so you still need to call the
121v4l2_device_unregister() function for that. If your driver is not hotpluggable,
122then there is no need to call v4l2_device_disconnect().
123
98Sometimes you need to iterate over all devices registered by a specific 124Sometimes you need to iterate over all devices registered by a specific
99driver. This is usually the case if multiple device drivers use the same 125driver. This is usually the case if multiple device drivers use the same
100hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv 126hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv
@@ -134,7 +160,7 @@ The recommended approach is as follows:
134 160
135static atomic_t drv_instance = ATOMIC_INIT(0); 161static atomic_t drv_instance = ATOMIC_INIT(0);
136 162
137static int __devinit drv_probe(struct pci_dev *dev, 163static int __devinit drv_probe(struct pci_dev *pdev,
138 const struct pci_device_id *pci_id) 164 const struct pci_device_id *pci_id)
139{ 165{
140 ... 166 ...
@@ -218,7 +244,7 @@ to add new ops and categories.
218 244
219A sub-device driver initializes the v4l2_subdev struct using: 245A sub-device driver initializes the v4l2_subdev struct using:
220 246
221 v4l2_subdev_init(subdev, &ops); 247 v4l2_subdev_init(sd, &ops);
222 248
223Afterwards you need to initialize subdev->name with a unique name and set the 249Afterwards you need to initialize subdev->name with a unique name and set the
224module owner. This is done for you if you use the i2c helper functions. 250module owner. This is done for you if you use the i2c helper functions.
@@ -226,7 +252,7 @@ module owner. This is done for you if you use the i2c helper functions.
226A device (bridge) driver needs to register the v4l2_subdev with the 252A device (bridge) driver needs to register the v4l2_subdev with the
227v4l2_device: 253v4l2_device:
228 254
229 int err = v4l2_device_register_subdev(device, subdev); 255 int err = v4l2_device_register_subdev(v4l2_dev, sd);
230 256
231This can fail if the subdev module disappeared before it could be registered. 257This can fail if the subdev module disappeared before it could be registered.
232After this function was called successfully the subdev->dev field points to 258After this function was called successfully the subdev->dev field points to
@@ -234,17 +260,17 @@ the v4l2_device.
234 260
235You can unregister a sub-device using: 261You can unregister a sub-device using:
236 262
237 v4l2_device_unregister_subdev(subdev); 263 v4l2_device_unregister_subdev(sd);
238 264
239Afterwards the subdev module can be unloaded and subdev->dev == NULL. 265Afterwards the subdev module can be unloaded and sd->dev == NULL.
240 266
241You can call an ops function either directly: 267You can call an ops function either directly:
242 268
243 err = subdev->ops->core->g_chip_ident(subdev, &chip); 269 err = sd->ops->core->g_chip_ident(sd, &chip);
244 270
245but it is better and easier to use this macro: 271but it is better and easier to use this macro:
246 272
247 err = v4l2_subdev_call(subdev, core, g_chip_ident, &chip); 273 err = v4l2_subdev_call(sd, core, g_chip_ident, &chip);
248 274
249The macro will to the right NULL pointer checks and returns -ENODEV if subdev 275The macro will to the right NULL pointer checks and returns -ENODEV if subdev
250is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_chip_ident is 276is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_chip_ident is
@@ -252,19 +278,19 @@ NULL, or the actual result of the subdev->ops->core->g_chip_ident ops.
252 278
253It is also possible to call all or a subset of the sub-devices: 279It is also possible to call all or a subset of the sub-devices:
254 280
255 v4l2_device_call_all(dev, 0, core, g_chip_ident, &chip); 281 v4l2_device_call_all(v4l2_dev, 0, core, g_chip_ident, &chip);
256 282
257Any subdev that does not support this ops is skipped and error results are 283Any subdev that does not support this ops is skipped and error results are
258ignored. If you want to check for errors use this: 284ignored. If you want to check for errors use this:
259 285
260 err = v4l2_device_call_until_err(dev, 0, core, g_chip_ident, &chip); 286 err = v4l2_device_call_until_err(v4l2_dev, 0, core, g_chip_ident, &chip);
261 287
262Any error except -ENOIOCTLCMD will exit the loop with that error. If no 288Any error except -ENOIOCTLCMD will exit the loop with that error. If no
263errors (except -ENOIOCTLCMD) occured, then 0 is returned. 289errors (except -ENOIOCTLCMD) occured, then 0 is returned.
264 290
265The second argument to both calls is a group ID. If 0, then all subdevs are 291The second argument to both calls is a group ID. If 0, then all subdevs are
266called. If non-zero, then only those whose group ID match that value will 292called. If non-zero, then only those whose group ID match that value will
267be called. Before a bridge driver registers a subdev it can set subdev->grp_id 293be called. Before a bridge driver registers a subdev it can set sd->grp_id
268to whatever value it wants (it's 0 by default). This value is owned by the 294to whatever value it wants (it's 0 by default). This value is owned by the
269bridge driver and the sub-device driver will never modify or use it. 295bridge driver and the sub-device driver will never modify or use it.
270 296
@@ -276,6 +302,11 @@ e.g. AUDIO_CONTROLLER and specify that as the group ID value when calling
276v4l2_device_call_all(). That ensures that it will only go to the subdev 302v4l2_device_call_all(). That ensures that it will only go to the subdev
277that needs it. 303that needs it.
278 304
305If the sub-device needs to notify its v4l2_device parent of an event, then
306it can call v4l2_subdev_notify(sd, notification, arg). This macro checks
307whether there is a notify() callback defined and returns -ENODEV if not.
308Otherwise the result of the notify() call is returned.
309
279The advantage of using v4l2_subdev is that it is a generic struct and does 310The advantage of using v4l2_subdev is that it is a generic struct and does
280not contain any knowledge about the underlying hardware. So a driver might 311not contain any knowledge about the underlying hardware. So a driver might
281contain several subdevs that use an I2C bus, but also a subdev that is 312contain several subdevs that use an I2C bus, but also a subdev that is
@@ -325,32 +356,25 @@ And this to go from an i2c_client to a v4l2_subdev struct:
325 356
326 struct v4l2_subdev *sd = i2c_get_clientdata(client); 357 struct v4l2_subdev *sd = i2c_get_clientdata(client);
327 358
328Finally you need to make a command function to make driver->command()
329call the right subdev_ops functions:
330
331static int subdev_command(struct i2c_client *client, unsigned cmd, void *arg)
332{
333 return v4l2_subdev_command(i2c_get_clientdata(client), cmd, arg);
334}
335
336If driver->command is never used then you can leave this out. Eventually the
337driver->command usage should be removed from v4l.
338
339Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback 359Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback
340is called. This will unregister the sub-device from the bridge driver. It is 360is called. This will unregister the sub-device from the bridge driver. It is
341safe to call this even if the sub-device was never registered. 361safe to call this even if the sub-device was never registered.
342 362
363You need to do this because when the bridge driver destroys the i2c adapter
364the remove() callbacks are called of the i2c devices on that adapter.
365After that the corresponding v4l2_subdev structures are invalid, so they
366have to be unregistered first. Calling v4l2_device_unregister_subdev(sd)
367from the remove() callback ensures that this is always done correctly.
368
343 369
344The bridge driver also has some helper functions it can use: 370The bridge driver also has some helper functions it can use:
345 371
346struct v4l2_subdev *sd = v4l2_i2c_new_subdev(adapter, "module_foo", "chipid", 0x36); 372struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
373 "module_foo", "chipid", 0x36);
347 374
348This loads the given module (can be NULL if no module needs to be loaded) and 375This loads the given module (can be NULL if no module needs to be loaded) and
349calls i2c_new_device() with the given i2c_adapter and chip/address arguments. 376calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
350If all goes well, then it registers the subdev with the v4l2_device. It gets 377If all goes well, then it registers the subdev with the v4l2_device.
351the v4l2_device by calling i2c_get_adapdata(adapter), so you should make sure
352that adapdata is set to v4l2_device when you setup the i2c_adapter in your
353driver.
354 378
355You can also use v4l2_i2c_new_probed_subdev() which is very similar to 379You can also use v4l2_i2c_new_probed_subdev() which is very similar to
356v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses 380v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses
@@ -358,6 +382,14 @@ that it should probe. Internally it calls i2c_new_probed_device().
358 382
359Both functions return NULL if something went wrong. 383Both functions return NULL if something went wrong.
360 384
385Note that the chipid you pass to v4l2_i2c_new_(probed_)subdev() is usually
386the same as the module name. It allows you to specify a chip variant, e.g.
387"saa7114" or "saa7115". In general though the i2c driver autodetects this.
388The use of chipid is something that needs to be looked at more closely at a
389later date. It differs between i2c drivers and as such can be confusing.
390To see which chip variants are supported you can look in the i2c driver code
391for the i2c_device_id table. This lists all the possibilities.
392
361 393
362struct video_device 394struct video_device
363------------------- 395-------------------
@@ -396,6 +428,15 @@ You should also set these fields:
396- ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance 428- ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance
397 (highly recommended to use this and it might become compulsory in the 429 (highly recommended to use this and it might become compulsory in the
398 future!), then set this to your v4l2_ioctl_ops struct. 430 future!), then set this to your v4l2_ioctl_ops struct.
431- parent: you only set this if v4l2_device was registered with NULL as
432 the parent device struct. This only happens in cases where one hardware
433 device has multiple PCI devices that all share the same v4l2_device core.
434
435 The cx88 driver is an example of this: one core v4l2_device struct, but
436 it is used by both an raw video PCI device (cx8800) and a MPEG PCI device
437 (cx8802). Since the v4l2_device cannot be associated with a particular
438 PCI device it is setup without a parent device. But when the struct
439 video_device is setup you do know which parent PCI device to use.
399 440
400If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or 441If you use v4l2_ioctl_ops, then you should set either .unlocked_ioctl or
401.ioctl to video_ioctl2 in your v4l2_file_operations struct. 442.ioctl to video_ioctl2 in your v4l2_file_operations struct.
@@ -499,8 +540,8 @@ There are a few useful helper functions:
499 540
500You can set/get driver private data in the video_device struct using: 541You can set/get driver private data in the video_device struct using:
501 542
502void *video_get_drvdata(struct video_device *dev); 543void *video_get_drvdata(struct video_device *vdev);
503void video_set_drvdata(struct video_device *dev, void *data); 544void video_set_drvdata(struct video_device *vdev, void *data);
504 545
505Note that you can safely call video_set_drvdata() before calling 546Note that you can safely call video_set_drvdata() before calling
506video_register_device(). 547video_register_device().
@@ -519,3 +560,103 @@ void *video_drvdata(struct file *file);
519You can go from a video_device struct to the v4l2_device struct using: 560You can go from a video_device struct to the v4l2_device struct using:
520 561
521struct v4l2_device *v4l2_dev = vdev->v4l2_dev; 562struct v4l2_device *v4l2_dev = vdev->v4l2_dev;
563
564video buffer helper functions
565-----------------------------
566
567The v4l2 core API provides a standard method for dealing with video
568buffers. Those methods allow a driver to implement read(), mmap() and
569overlay() on a consistent way.
570
571There are currently methods for using video buffers on devices that
572supports DMA with scatter/gather method (videobuf-dma-sg), DMA with
573linear access (videobuf-dma-contig), and vmalloced buffers, mostly
574used on USB drivers (videobuf-vmalloc).
575
576Any driver using videobuf should provide operations (callbacks) for
577four handlers:
578
579ops->buf_setup - calculates the size of the video buffers and avoid they
580 to waste more than some maximum limit of RAM;
581ops->buf_prepare - fills the video buffer structs and calls
582 videobuf_iolock() to alloc and prepare mmaped memory;
583ops->buf_queue - advices the driver that another buffer were
584 requested (by read() or by QBUF);
585ops->buf_release - frees any buffer that were allocated.
586
587In order to use it, the driver need to have a code (generally called at
588interrupt context) that will properly handle the buffer request lists,
589announcing that a new buffer were filled.
590
591The irq handling code should handle the videobuf task lists, in order
592to advice videobuf that a new frame were filled, in order to honor to a
593request. The code is generally like this one:
594 if (list_empty(&dma_q->active))
595 return;
596
597 buf = list_entry(dma_q->active.next, struct vbuffer, vb.queue);
598
599 if (!waitqueue_active(&buf->vb.done))
600 return;
601
602 /* Some logic to handle the buf may be needed here */
603
604 list_del(&buf->vb.queue);
605 do_gettimeofday(&buf->vb.ts);
606 wake_up(&buf->vb.done);
607
608Those are the videobuffer functions used on drivers, implemented on
609videobuf-core:
610
611- Videobuf init functions
612 videobuf_queue_sg_init()
613 Initializes the videobuf infrastructure. This function should be
614 called before any other videobuf function on drivers that uses DMA
615 Scatter/Gather buffers.
616
617 videobuf_queue_dma_contig_init
618 Initializes the videobuf infrastructure. This function should be
619 called before any other videobuf function on drivers that need DMA
620 contiguous buffers.
621
622 videobuf_queue_vmalloc_init()
623 Initializes the videobuf infrastructure. This function should be
624 called before any other videobuf function on USB (and other drivers)
625 that need a vmalloced type of videobuf.
626
627- videobuf_iolock()
628 Prepares the videobuf memory for the proper method (read, mmap, overlay).
629
630- videobuf_queue_is_busy()
631 Checks if a videobuf is streaming.
632
633- videobuf_queue_cancel()
634 Stops video handling.
635
636- videobuf_mmap_free()
637 frees mmap buffers.
638
639- videobuf_stop()
640 Stops video handling, ends mmap and frees mmap and other buffers.
641
642- V4L2 api functions. Those functions correspond to VIDIOC_foo ioctls:
643 videobuf_reqbufs(), videobuf_querybuf(), videobuf_qbuf(),
644 videobuf_dqbuf(), videobuf_streamon(), videobuf_streamoff().
645
646- V4L1 api function (corresponds to VIDIOCMBUF ioctl):
647 videobuf_cgmbuf()
648 This function is used to provide backward compatibility with V4L1
649 API.
650
651- Some help functions for read()/poll() operations:
652 videobuf_read_stream()
653 For continuous stream read()
654 videobuf_read_one()
655 For snapshot read()
656 videobuf_poll_stream()
657 polling help function
658
659The better way to understand it is to take a look at vivi driver. One
660of the main reasons for vivi is to be a videobuf usage example. the
661vivi_thread_tick() does the task that the IRQ callback would do on PCI
662drivers (or the irq callback on USB).
diff --git a/Documentation/video4linux/v4lgrab.c b/Documentation/video4linux/v4lgrab.c
index d6e70bef8ad0..05769cff1009 100644
--- a/Documentation/video4linux/v4lgrab.c
+++ b/Documentation/video4linux/v4lgrab.c
@@ -105,8 +105,8 @@ int main(int argc, char ** argv)
105 struct video_picture vpic; 105 struct video_picture vpic;
106 106
107 unsigned char *buffer, *src; 107 unsigned char *buffer, *src;
108 int bpp = 24, r, g, b; 108 int bpp = 24, r = 0, g = 0, b = 0;
109 unsigned int i, src_depth; 109 unsigned int i, src_depth = 16;
110 110
111 if (fd < 0) { 111 if (fd < 0) {
112 perror(VIDEO_DEV); 112 perror(VIDEO_DEV);
diff --git a/Documentation/video4linux/zr364xx.txt b/Documentation/video4linux/zr364xx.txt
index 5c81e3ae6458..7f3d1955d214 100644
--- a/Documentation/video4linux/zr364xx.txt
+++ b/Documentation/video4linux/zr364xx.txt
@@ -65,3 +65,4 @@ Vendor Product Distributor Model
650x06d6 0x003b Trust Powerc@m 970Z 650x06d6 0x003b Trust Powerc@m 970Z
660x0a17 0x004e Pentax Optio 50 660x0a17 0x004e Pentax Optio 50
670x041e 0x405d Creative DiVi CAM 516 670x041e 0x405d Creative DiVi CAM 516
680x08ca 0x2102 Aiptek DV T300
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 2131b00b63f6..2f77ced35df7 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -1,5 +1,7 @@
100-INDEX 100-INDEX
2 - this file. 2 - this file.
3active_mm.txt
4 - An explanation from Linus about tsk->active_mm vs tsk->mm.
3balance 5balance
4 - various information on memory balancing. 6 - various information on memory balancing.
5hugetlbpage.txt 7hugetlbpage.txt
diff --git a/Documentation/vm/Makefile b/Documentation/vm/Makefile
index 6f562f778b28..5bd269b3731a 100644
--- a/Documentation/vm/Makefile
+++ b/Documentation/vm/Makefile
@@ -2,7 +2,7 @@
2obj- := dummy.o 2obj- := dummy.o
3 3
4# List of programs to build 4# List of programs to build
5hostprogs-y := slabinfo 5hostprogs-y := slabinfo page-types
6 6
7# Tell kbuild to always build the programs 7# Tell kbuild to always build the programs
8always := $(hostprogs-y) 8always := $(hostprogs-y)
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt
new file mode 100644
index 000000000000..4ee1f643d897
--- /dev/null
+++ b/Documentation/vm/active_mm.txt
@@ -0,0 +1,83 @@
1List: linux-kernel
2Subject: Re: active_mm
3From: Linus Torvalds <torvalds () transmeta ! com>
4Date: 1999-07-30 21:36:24
5
6Cc'd to linux-kernel, because I don't write explanations all that often,
7and when I do I feel better about more people reading them.
8
9On Fri, 30 Jul 1999, David Mosberger wrote:
10>
11> Is there a brief description someplace on how "mm" vs. "active_mm" in
12> the task_struct are supposed to be used? (My apologies if this was
13> discussed on the mailing lists---I just returned from vacation and
14> wasn't able to follow linux-kernel for a while).
15
16Basically, the new setup is:
17
18 - we have "real address spaces" and "anonymous address spaces". The
19 difference is that an anonymous address space doesn't care about the
20 user-level page tables at all, so when we do a context switch into an
21 anonymous address space we just leave the previous address space
22 active.
23
24 The obvious use for a "anonymous address space" is any thread that
25 doesn't need any user mappings - all kernel threads basically fall into
26 this category, but even "real" threads can temporarily say that for
27 some amount of time they are not going to be interested in user space,
28 and that the scheduler might as well try to avoid wasting time on
29 switching the VM state around. Currently only the old-style bdflush
30 sync does that.
31
32 - "tsk->mm" points to the "real address space". For an anonymous process,
33 tsk->mm will be NULL, for the logical reason that an anonymous process
34 really doesn't _have_ a real address space at all.
35
36 - however, we obviously need to keep track of which address space we
37 "stole" for such an anonymous user. For that, we have "tsk->active_mm",
38 which shows what the currently active address space is.
39
40 The rule is that for a process with a real address space (ie tsk->mm is
41 non-NULL) the active_mm obviously always has to be the same as the real
42 one.
43
44 For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
45 "borrowed" mm while the anonymous process is running. When the
46 anonymous process gets scheduled away, the borrowed address space is
47 returned and cleared.
48
49To support all that, the "struct mm_struct" now has two counters: a
50"mm_users" counter that is how many "real address space users" there are,
51and a "mm_count" counter that is the number of "lazy" users (ie anonymous
52users) plus one if there are any real users.
53
54Usually there is at least one real user, but it could be that the real
55user exited on another CPU while a lazy user was still active, so you do
56actually get cases where you have a address space that is _only_ used by
57lazy users. That is often a short-lived state, because once that thread
58gets scheduled away in favour of a real thread, the "zombie" mm gets
59released because "mm_users" becomes zero.
60
61Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
62more. "init_mm" should be considered just a "lazy context when no other
63context is available", and in fact it is mainly used just at bootup when
64no real VM has yet been created. So code that used to check
65
66 if (current->mm == &init_mm)
67
68should generally just do
69
70 if (!current->mm)
71
72instead (which makes more sense anyway - the test is basically one of "do
73we have a user context", and is generally done by the page fault handler
74and things like that).
75
76Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
77because it slightly changes the interfaces to accomodate the alpha (who
78would have thought it, but the alpha actually ends up having one of the
79ugliest context switch codes - unlike the other architectures where the MM
80and register state is separate, the alpha PALcode joins the two, and you
81need to switch both together).
82
83(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
index bd3d31bc4915..c46e68cf9344 100644
--- a/Documentation/vm/balance
+++ b/Documentation/vm/balance
@@ -75,15 +75,15 @@ Page stealing from process memory and shm is done if stealing the page would
75alleviate memory pressure on any zone in the page's node that has fallen below 75alleviate memory pressure on any zone in the page's node that has fallen below
76its watermark. 76its watermark.
77 77
78pages_min/pages_low/pages_high/low_on_memory/zone_wake_kswapd: These are 78watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These
79per-zone fields, used to determine when a zone needs to be balanced. When 79are per-zone fields, used to determine when a zone needs to be balanced. When
80the number of pages falls below pages_min, the hysteric field low_on_memory 80the number of pages falls below watermark[WMARK_MIN], the hysteric field
81gets set. This stays set till the number of free pages becomes pages_high. 81low_on_memory gets set. This stays set till the number of free pages becomes
82When low_on_memory is set, page allocation requests will try to free some 82watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will
83pages in the zone (providing GFP_WAIT is set in the request). Orthogonal 83try to free some pages in the zone (providing GFP_WAIT is set in the request).
84to this, is the decision to poke kswapd to free some zone pages. That 84Orthogonal to this, is the decision to poke kswapd to free some zone pages.
85decision is not hysteresis based, and is done when the number of free 85That decision is not hysteresis based, and is done when the number of free
86pages is below pages_low; in which case zone_wake_kswapd is also set. 86pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set.
87 87
88 88
89(Good) Ideas that I have heard: 89(Good) Ideas that I have heard:
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index 6aaaeb38730c..be45dbb9d7f2 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -8,7 +8,8 @@ The current memory policy support was added to Linux 2.6 around May 2004. This
8document attempts to describe the concepts and APIs of the 2.6 memory policy 8document attempts to describe the concepts and APIs of the 2.6 memory policy
9support. 9support.
10 10
11Memory policies should not be confused with cpusets (Documentation/cpusets.txt) 11Memory policies should not be confused with cpusets
12(Documentation/cgroups/cpusets.txt)
12which is an administrative mechanism for restricting the nodes from which 13which is an administrative mechanism for restricting the nodes from which
13memory may be allocated by a set of processes. Memory policies are a 14memory may be allocated by a set of processes. Memory policies are a
14programming interface that a NUMA-aware application can take advantage of. When 15programming interface that a NUMA-aware application can take advantage of. When
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
new file mode 100644
index 000000000000..0833f44ba16b
--- /dev/null
+++ b/Documentation/vm/page-types.c
@@ -0,0 +1,698 @@
1/*
2 * page-types: Tool for querying page flags
3 *
4 * Copyright (C) 2009 Intel corporation
5 * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
6 */
7
8#include <stdio.h>
9#include <stdlib.h>
10#include <unistd.h>
11#include <stdint.h>
12#include <stdarg.h>
13#include <string.h>
14#include <getopt.h>
15#include <limits.h>
16#include <sys/types.h>
17#include <sys/errno.h>
18#include <sys/fcntl.h>
19
20
21/*
22 * kernel page flags
23 */
24
25#define KPF_BYTES 8
26#define PROC_KPAGEFLAGS "/proc/kpageflags"
27
28/* copied from kpageflags_read() */
29#define KPF_LOCKED 0
30#define KPF_ERROR 1
31#define KPF_REFERENCED 2
32#define KPF_UPTODATE 3
33#define KPF_DIRTY 4
34#define KPF_LRU 5
35#define KPF_ACTIVE 6
36#define KPF_SLAB 7
37#define KPF_WRITEBACK 8
38#define KPF_RECLAIM 9
39#define KPF_BUDDY 10
40
41/* [11-20] new additions in 2.6.31 */
42#define KPF_MMAP 11
43#define KPF_ANON 12
44#define KPF_SWAPCACHE 13
45#define KPF_SWAPBACKED 14
46#define KPF_COMPOUND_HEAD 15
47#define KPF_COMPOUND_TAIL 16
48#define KPF_HUGE 17
49#define KPF_UNEVICTABLE 18
50#define KPF_NOPAGE 20
51
52/* [32-] kernel hacking assistances */
53#define KPF_RESERVED 32
54#define KPF_MLOCKED 33
55#define KPF_MAPPEDTODISK 34
56#define KPF_PRIVATE 35
57#define KPF_PRIVATE_2 36
58#define KPF_OWNER_PRIVATE 37
59#define KPF_ARCH 38
60#define KPF_UNCACHED 39
61
62/* [48-] take some arbitrary free slots for expanding overloaded flags
63 * not part of kernel API
64 */
65#define KPF_READAHEAD 48
66#define KPF_SLOB_FREE 49
67#define KPF_SLUB_FROZEN 50
68#define KPF_SLUB_DEBUG 51
69
70#define KPF_ALL_BITS ((uint64_t)~0ULL)
71#define KPF_HACKERS_BITS (0xffffULL << 32)
72#define KPF_OVERLOADED_BITS (0xffffULL << 48)
73#define BIT(name) (1ULL << KPF_##name)
74#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
75
76static char *page_flag_names[] = {
77 [KPF_LOCKED] = "L:locked",
78 [KPF_ERROR] = "E:error",
79 [KPF_REFERENCED] = "R:referenced",
80 [KPF_UPTODATE] = "U:uptodate",
81 [KPF_DIRTY] = "D:dirty",
82 [KPF_LRU] = "l:lru",
83 [KPF_ACTIVE] = "A:active",
84 [KPF_SLAB] = "S:slab",
85 [KPF_WRITEBACK] = "W:writeback",
86 [KPF_RECLAIM] = "I:reclaim",
87 [KPF_BUDDY] = "B:buddy",
88
89 [KPF_MMAP] = "M:mmap",
90 [KPF_ANON] = "a:anonymous",
91 [KPF_SWAPCACHE] = "s:swapcache",
92 [KPF_SWAPBACKED] = "b:swapbacked",
93 [KPF_COMPOUND_HEAD] = "H:compound_head",
94 [KPF_COMPOUND_TAIL] = "T:compound_tail",
95 [KPF_HUGE] = "G:huge",
96 [KPF_UNEVICTABLE] = "u:unevictable",
97 [KPF_NOPAGE] = "n:nopage",
98
99 [KPF_RESERVED] = "r:reserved",
100 [KPF_MLOCKED] = "m:mlocked",
101 [KPF_MAPPEDTODISK] = "d:mappedtodisk",
102 [KPF_PRIVATE] = "P:private",
103 [KPF_PRIVATE_2] = "p:private_2",
104 [KPF_OWNER_PRIVATE] = "O:owner_private",
105 [KPF_ARCH] = "h:arch",
106 [KPF_UNCACHED] = "c:uncached",
107
108 [KPF_READAHEAD] = "I:readahead",
109 [KPF_SLOB_FREE] = "P:slob_free",
110 [KPF_SLUB_FROZEN] = "A:slub_frozen",
111 [KPF_SLUB_DEBUG] = "E:slub_debug",
112};
113
114
115/*
116 * data structures
117 */
118
119static int opt_raw; /* for kernel developers */
120static int opt_list; /* list pages (in ranges) */
121static int opt_no_summary; /* don't show summary */
122static pid_t opt_pid; /* process to walk */
123
124#define MAX_ADDR_RANGES 1024
125static int nr_addr_ranges;
126static unsigned long opt_offset[MAX_ADDR_RANGES];
127static unsigned long opt_size[MAX_ADDR_RANGES];
128
129#define MAX_BIT_FILTERS 64
130static int nr_bit_filters;
131static uint64_t opt_mask[MAX_BIT_FILTERS];
132static uint64_t opt_bits[MAX_BIT_FILTERS];
133
134static int page_size;
135
136#define PAGES_BATCH (64 << 10) /* 64k pages */
137static int kpageflags_fd;
138static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
139
140#define HASH_SHIFT 13
141#define HASH_SIZE (1 << HASH_SHIFT)
142#define HASH_MASK (HASH_SIZE - 1)
143#define HASH_KEY(flags) (flags & HASH_MASK)
144
145static unsigned long total_pages;
146static unsigned long nr_pages[HASH_SIZE];
147static uint64_t page_flags[HASH_SIZE];
148
149
150/*
151 * helper functions
152 */
153
154#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
155
156#define min_t(type, x, y) ({ \
157 type __min1 = (x); \
158 type __min2 = (y); \
159 __min1 < __min2 ? __min1 : __min2; })
160
161unsigned long pages2mb(unsigned long pages)
162{
163 return (pages * page_size) >> 20;
164}
165
166void fatal(const char *x, ...)
167{
168 va_list ap;
169
170 va_start(ap, x);
171 vfprintf(stderr, x, ap);
172 va_end(ap);
173 exit(EXIT_FAILURE);
174}
175
176
177/*
178 * page flag names
179 */
180
181char *page_flag_name(uint64_t flags)
182{
183 static char buf[65];
184 int present;
185 int i, j;
186
187 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
188 present = (flags >> i) & 1;
189 if (!page_flag_names[i]) {
190 if (present)
191 fatal("unkown flag bit %d\n", i);
192 continue;
193 }
194 buf[j++] = present ? page_flag_names[i][0] : '_';
195 }
196
197 return buf;
198}
199
200char *page_flag_longname(uint64_t flags)
201{
202 static char buf[1024];
203 int i, n;
204
205 for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
206 if (!page_flag_names[i])
207 continue;
208 if ((flags >> i) & 1)
209 n += snprintf(buf + n, sizeof(buf) - n, "%s,",
210 page_flag_names[i] + 2);
211 }
212 if (n)
213 n--;
214 buf[n] = '\0';
215
216 return buf;
217}
218
219
220/*
221 * page list and summary
222 */
223
224void show_page_range(unsigned long offset, uint64_t flags)
225{
226 static uint64_t flags0;
227 static unsigned long index;
228 static unsigned long count;
229
230 if (flags == flags0 && offset == index + count) {
231 count++;
232 return;
233 }
234
235 if (count)
236 printf("%lu\t%lu\t%s\n",
237 index, count, page_flag_name(flags0));
238
239 flags0 = flags;
240 index = offset;
241 count = 1;
242}
243
244void show_page(unsigned long offset, uint64_t flags)
245{
246 printf("%lu\t%s\n", offset, page_flag_name(flags));
247}
248
249void show_summary(void)
250{
251 int i;
252
253 printf(" flags\tpage-count MB"
254 " symbolic-flags\t\t\tlong-symbolic-flags\n");
255
256 for (i = 0; i < ARRAY_SIZE(nr_pages); i++) {
257 if (nr_pages[i])
258 printf("0x%016llx\t%10lu %8lu %s\t%s\n",
259 (unsigned long long)page_flags[i],
260 nr_pages[i],
261 pages2mb(nr_pages[i]),
262 page_flag_name(page_flags[i]),
263 page_flag_longname(page_flags[i]));
264 }
265
266 printf(" total\t%10lu %8lu\n",
267 total_pages, pages2mb(total_pages));
268}
269
270
271/*
272 * page flag filters
273 */
274
275int bit_mask_ok(uint64_t flags)
276{
277 int i;
278
279 for (i = 0; i < nr_bit_filters; i++) {
280 if (opt_bits[i] == KPF_ALL_BITS) {
281 if ((flags & opt_mask[i]) == 0)
282 return 0;
283 } else {
284 if ((flags & opt_mask[i]) != opt_bits[i])
285 return 0;
286 }
287 }
288
289 return 1;
290}
291
292uint64_t expand_overloaded_flags(uint64_t flags)
293{
294 /* SLOB/SLUB overload several page flags */
295 if (flags & BIT(SLAB)) {
296 if (flags & BIT(PRIVATE))
297 flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
298 if (flags & BIT(ACTIVE))
299 flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
300 if (flags & BIT(ERROR))
301 flags ^= BIT(ERROR) | BIT(SLUB_DEBUG);
302 }
303
304 /* PG_reclaim is overloaded as PG_readahead in the read path */
305 if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
306 flags ^= BIT(RECLAIM) | BIT(READAHEAD);
307
308 return flags;
309}
310
311uint64_t well_known_flags(uint64_t flags)
312{
313 /* hide flags intended only for kernel hacker */
314 flags &= ~KPF_HACKERS_BITS;
315
316 /* hide non-hugeTLB compound pages */
317 if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE)))
318 flags &= ~BITS_COMPOUND;
319
320 return flags;
321}
322
323
324/*
325 * page frame walker
326 */
327
328int hash_slot(uint64_t flags)
329{
330 int k = HASH_KEY(flags);
331 int i;
332
333 /* Explicitly reserve slot 0 for flags 0: the following logic
334 * cannot distinguish an unoccupied slot from slot (flags==0).
335 */
336 if (flags == 0)
337 return 0;
338
339 /* search through the remaining (HASH_SIZE-1) slots */
340 for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) {
341 if (!k || k >= ARRAY_SIZE(page_flags))
342 k = 1;
343 if (page_flags[k] == 0) {
344 page_flags[k] = flags;
345 return k;
346 }
347 if (page_flags[k] == flags)
348 return k;
349 }
350
351 fatal("hash table full: bump up HASH_SHIFT?\n");
352 exit(EXIT_FAILURE);
353}
354
355void add_page(unsigned long offset, uint64_t flags)
356{
357 flags = expand_overloaded_flags(flags);
358
359 if (!opt_raw)
360 flags = well_known_flags(flags);
361
362 if (!bit_mask_ok(flags))
363 return;
364
365 if (opt_list == 1)
366 show_page_range(offset, flags);
367 else if (opt_list == 2)
368 show_page(offset, flags);
369
370 nr_pages[hash_slot(flags)]++;
371 total_pages++;
372}
373
374void walk_pfn(unsigned long index, unsigned long count)
375{
376 unsigned long batch;
377 unsigned long n;
378 unsigned long i;
379
380 if (index > ULONG_MAX / KPF_BYTES)
381 fatal("index overflow: %lu\n", index);
382
383 lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
384
385 while (count) {
386 batch = min_t(unsigned long, count, PAGES_BATCH);
387 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
388 if (n == 0)
389 break;
390 if (n < 0) {
391 perror(PROC_KPAGEFLAGS);
392 exit(EXIT_FAILURE);
393 }
394
395 if (n % KPF_BYTES != 0)
396 fatal("partial read: %lu bytes\n", n);
397 n = n / KPF_BYTES;
398
399 for (i = 0; i < n; i++)
400 add_page(index + i, kpageflags_buf[i]);
401
402 index += batch;
403 count -= batch;
404 }
405}
406
407void walk_addr_ranges(void)
408{
409 int i;
410
411 kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY);
412 if (kpageflags_fd < 0) {
413 perror(PROC_KPAGEFLAGS);
414 exit(EXIT_FAILURE);
415 }
416
417 if (!nr_addr_ranges)
418 walk_pfn(0, ULONG_MAX);
419
420 for (i = 0; i < nr_addr_ranges; i++)
421 walk_pfn(opt_offset[i], opt_size[i]);
422
423 close(kpageflags_fd);
424}
425
426
427/*
428 * user interface
429 */
430
431const char *page_flag_type(uint64_t flag)
432{
433 if (flag & KPF_HACKERS_BITS)
434 return "(r)";
435 if (flag & KPF_OVERLOADED_BITS)
436 return "(o)";
437 return " ";
438}
439
440void usage(void)
441{
442 int i, j;
443
444 printf(
445"page-types [options]\n"
446" -r|--raw Raw mode, for kernel developers\n"
447" -a|--addr addr-spec Walk a range of pages\n"
448" -b|--bits bits-spec Walk pages with specified bits\n"
449#if 0 /* planned features */
450" -p|--pid pid Walk process address space\n"
451" -f|--file filename Walk file address space\n"
452#endif
453" -l|--list Show page details in ranges\n"
454" -L|--list-each Show page details one by one\n"
455" -N|--no-summary Don't show summay info\n"
456" -h|--help Show this usage message\n"
457"addr-spec:\n"
458" N one page at offset N (unit: pages)\n"
459" N+M pages range from N to N+M-1\n"
460" N,M pages range from N to M-1\n"
461" N, pages range from N to end\n"
462" ,M pages range from 0 to M\n"
463"bits-spec:\n"
464" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
465" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
466" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n"
467" =bit1,bit2 flags == (bit1|bit2)\n"
468"bit-names:\n"
469 );
470
471 for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
472 if (!page_flag_names[i])
473 continue;
474 printf("%16s%s", page_flag_names[i] + 2,
475 page_flag_type(1ULL << i));
476 if (++j > 3) {
477 j = 0;
478 putchar('\n');
479 }
480 }
481 printf("\n "
482 "(r) raw mode bits (o) overloaded bits\n");
483}
484
485unsigned long long parse_number(const char *str)
486{
487 unsigned long long n;
488
489 n = strtoll(str, NULL, 0);
490
491 if (n == 0 && str[0] != '0')
492 fatal("invalid name or number: %s\n", str);
493
494 return n;
495}
496
497void parse_pid(const char *str)
498{
499 opt_pid = parse_number(str);
500}
501
502void parse_file(const char *name)
503{
504}
505
506void add_addr_range(unsigned long offset, unsigned long size)
507{
508 if (nr_addr_ranges >= MAX_ADDR_RANGES)
509 fatal("too much addr ranges\n");
510
511 opt_offset[nr_addr_ranges] = offset;
512 opt_size[nr_addr_ranges] = size;
513 nr_addr_ranges++;
514}
515
516void parse_addr_range(const char *optarg)
517{
518 unsigned long offset;
519 unsigned long size;
520 char *p;
521
522 p = strchr(optarg, ',');
523 if (!p)
524 p = strchr(optarg, '+');
525
526 if (p == optarg) {
527 offset = 0;
528 size = parse_number(p + 1);
529 } else if (p) {
530 offset = parse_number(optarg);
531 if (p[1] == '\0')
532 size = ULONG_MAX;
533 else {
534 size = parse_number(p + 1);
535 if (*p == ',') {
536 if (size < offset)
537 fatal("invalid range: %lu,%lu\n",
538 offset, size);
539 size -= offset;
540 }
541 }
542 } else {
543 offset = parse_number(optarg);
544 size = 1;
545 }
546
547 add_addr_range(offset, size);
548}
549
550void add_bits_filter(uint64_t mask, uint64_t bits)
551{
552 if (nr_bit_filters >= MAX_BIT_FILTERS)
553 fatal("too much bit filters\n");
554
555 opt_mask[nr_bit_filters] = mask;
556 opt_bits[nr_bit_filters] = bits;
557 nr_bit_filters++;
558}
559
560uint64_t parse_flag_name(const char *str, int len)
561{
562 int i;
563
564 if (!*str || !len)
565 return 0;
566
567 if (len <= 8 && !strncmp(str, "compound", len))
568 return BITS_COMPOUND;
569
570 for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) {
571 if (!page_flag_names[i])
572 continue;
573 if (!strncmp(str, page_flag_names[i] + 2, len))
574 return 1ULL << i;
575 }
576
577 return parse_number(str);
578}
579
580uint64_t parse_flag_names(const char *str, int all)
581{
582 const char *p = str;
583 uint64_t flags = 0;
584
585 while (1) {
586 if (*p == ',' || *p == '=' || *p == '\0') {
587 if ((*str != '~') || (*str == '~' && all && *++str))
588 flags |= parse_flag_name(str, p - str);
589 if (*p != ',')
590 break;
591 str = p + 1;
592 }
593 p++;
594 }
595
596 return flags;
597}
598
599void parse_bits_mask(const char *optarg)
600{
601 uint64_t mask;
602 uint64_t bits;
603 const char *p;
604
605 p = strchr(optarg, '=');
606 if (p == optarg) {
607 mask = KPF_ALL_BITS;
608 bits = parse_flag_names(p + 1, 0);
609 } else if (p) {
610 mask = parse_flag_names(optarg, 0);
611 bits = parse_flag_names(p + 1, 0);
612 } else if (strchr(optarg, '~')) {
613 mask = parse_flag_names(optarg, 1);
614 bits = parse_flag_names(optarg, 0);
615 } else {
616 mask = parse_flag_names(optarg, 0);
617 bits = KPF_ALL_BITS;
618 }
619
620 add_bits_filter(mask, bits);
621}
622
623
624struct option opts[] = {
625 { "raw" , 0, NULL, 'r' },
626 { "pid" , 1, NULL, 'p' },
627 { "file" , 1, NULL, 'f' },
628 { "addr" , 1, NULL, 'a' },
629 { "bits" , 1, NULL, 'b' },
630 { "list" , 0, NULL, 'l' },
631 { "list-each" , 0, NULL, 'L' },
632 { "no-summary", 0, NULL, 'N' },
633 { "help" , 0, NULL, 'h' },
634 { NULL , 0, NULL, 0 }
635};
636
637int main(int argc, char *argv[])
638{
639 int c;
640
641 page_size = getpagesize();
642
643 while ((c = getopt_long(argc, argv,
644 "rp:f:a:b:lLNh", opts, NULL)) != -1) {
645 switch (c) {
646 case 'r':
647 opt_raw = 1;
648 break;
649 case 'p':
650 parse_pid(optarg);
651 break;
652 case 'f':
653 parse_file(optarg);
654 break;
655 case 'a':
656 parse_addr_range(optarg);
657 break;
658 case 'b':
659 parse_bits_mask(optarg);
660 break;
661 case 'l':
662 opt_list = 1;
663 break;
664 case 'L':
665 opt_list = 2;
666 break;
667 case 'N':
668 opt_no_summary = 1;
669 break;
670 case 'h':
671 usage();
672 exit(0);
673 default:
674 usage();
675 exit(1);
676 }
677 }
678
679 if (opt_list == 1)
680 printf("offset\tcount\tflags\n");
681 if (opt_list == 2)
682 printf("offset\tflags\n");
683
684 walk_addr_ranges();
685
686 if (opt_list == 1)
687 show_page_range(0, 0); /* drain the buffer */
688
689 if (opt_no_summary)
690 return 0;
691
692 if (opt_list)
693 printf("\n\n");
694
695 show_summary();
696
697 return 0;
698}
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
index d5fdfd34bbaf..6513fe2d90b8 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration
@@ -37,7 +37,8 @@ locations.
37 37
38Larger installations usually partition the system using cpusets into 38Larger installations usually partition the system using cpusets into
39sections of nodes. Paul Jackson has equipped cpusets with the ability to 39sections of nodes. Paul Jackson has equipped cpusets with the ability to
40move pages when a task is moved to another cpuset (See ../cpusets.txt). 40move pages when a task is moved to another cpuset (See
41Documentation/cgroups/cpusets.txt).
41Cpusets allows the automation of process locality. If a task is moved to 42Cpusets allows the automation of process locality. If a task is moved to
42a new cpuset then also all its pages are moved with it so that the 43a new cpuset then also all its pages are moved with it so that the
43performance of the process does not sink dramatically. Also the pages 44performance of the process does not sink dramatically. Also the pages
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index ce72c0fe6177..600a304a828c 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -12,9 +12,9 @@ There are three components to pagemap:
12 value for each virtual page, containing the following data (from 12 value for each virtual page, containing the following data (from
13 fs/proc/task_mmu.c, above pagemap_read): 13 fs/proc/task_mmu.c, above pagemap_read):
14 14
15 * Bits 0-55 page frame number (PFN) if present 15 * Bits 0-54 page frame number (PFN) if present
16 * Bits 0-4 swap type if swapped 16 * Bits 0-4 swap type if swapped
17 * Bits 5-55 swap offset if swapped 17 * Bits 5-54 swap offset if swapped
18 * Bits 55-60 page shift (page size = 1<<page shift) 18 * Bits 55-60 page shift (page size = 1<<page shift)
19 * Bit 61 reserved for future use 19 * Bit 61 reserved for future use
20 * Bit 62 page swapped 20 * Bit 62 page swapped
@@ -36,7 +36,7 @@ There are three components to pagemap:
36 * /proc/kpageflags. This file contains a 64-bit set of flags for each 36 * /proc/kpageflags. This file contains a 64-bit set of flags for each
37 page, indexed by PFN. 37 page, indexed by PFN.
38 38
39 The flags are (from fs/proc/proc_misc, above kpageflags_read): 39 The flags are (from fs/proc/page.c, above kpageflags_read):
40 40
41 0. LOCKED 41 0. LOCKED
42 1. ERROR 42 1. ERROR
@@ -49,6 +49,68 @@ There are three components to pagemap:
49 8. WRITEBACK 49 8. WRITEBACK
50 9. RECLAIM 50 9. RECLAIM
51 10. BUDDY 51 10. BUDDY
52 11. MMAP
53 12. ANON
54 13. SWAPCACHE
55 14. SWAPBACKED
56 15. COMPOUND_HEAD
57 16. COMPOUND_TAIL
58 16. HUGE
59 18. UNEVICTABLE
60 20. NOPAGE
61
62Short descriptions to the page flags:
63
64 0. LOCKED
65 page is being locked for exclusive access, eg. by undergoing read/write IO
66
67 7. SLAB
68 page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
69 When compound page is used, SLUB/SLQB will only set this flag on the head
70 page; SLOB will not flag it at all.
71
7210. BUDDY
73 a free memory block managed by the buddy system allocator
74 The buddy system organizes free memory in blocks of various orders.
75 An order N block has 2^N physically contiguous pages, with the BUDDY flag
76 set for and _only_ for the first page.
77
7815. COMPOUND_HEAD
7916. COMPOUND_TAIL
80 A compound page with order N consists of 2^N physically contiguous pages.
81 A compound page with order 2 takes the form of "HTTT", where H donates its
82 head page and T donates its tail page(s). The major consumers of compound
83 pages are hugeTLB pages (Documentation/vm/hugetlbpage.txt), the SLUB etc.
84 memory allocators and various device drivers. However in this interface,
85 only huge/giga pages are made visible to end users.
8617. HUGE
87 this is an integral part of a HugeTLB page
88
8920. NOPAGE
90 no page frame exists at the requested address
91
92 [IO related page flags]
93 1. ERROR IO error occurred
94 3. UPTODATE page has up-to-date data
95 ie. for file backed page: (in-memory data revision >= on-disk one)
96 4. DIRTY page has been written to, hence contains new data
97 ie. for file backed page: (in-memory data revision > on-disk one)
98 8. WRITEBACK page is being synced to disk
99
100 [LRU related page flags]
101 5. LRU page is in one of the LRU lists
102 6. ACTIVE page is in the active LRU list
10318. UNEVICTABLE page is in the unevictable (non-)LRU list
104 It is somehow pinned and not a candidate for LRU page reclaims,
105 eg. ramfs pages, shmctl(SHM_LOCK) and mlock() memory segments
106 2. REFERENCED page has been referenced since last LRU list enqueue/requeue
107 9. RECLAIM page will be reclaimed soon after its pageout IO completed
10811. MMAP a memory mapped page
10912. ANON a memory mapped page that is not part of a file
11013. SWAPCACHE page is mapped to swap space, ie. has an associated swap entry
11114. SWAPBACKED page is backed by swap/RAM
112
113The page-types tool in this directory can be used to query the above flags.
52 114
53Using pagemap to do something useful: 115Using pagemap to do something useful:
54 116
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index 0706a7282a8c..2d70d0d95108 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -1,588 +1,691 @@
1 1 ==============================
2This document describes the Linux memory management "Unevictable LRU" 2 UNEVICTABLE LRU INFRASTRUCTURE
3infrastructure and the use of this infrastructure to manage several types 3 ==============================
4of "unevictable" pages. The document attempts to provide the overall 4
5rationale behind this mechanism and the rationale for some of the design 5========
6decisions that drove the implementation. The latter design rationale is 6CONTENTS
7discussed in the context of an implementation description. Admittedly, one 7========
8can obtain the implementation details--the "what does it do?"--by reading the 8
9code. One hopes that the descriptions below add value by provide the answer 9 (*) The Unevictable LRU
10to "why does it do that?". 10
11 11 - The unevictable page list.
12Unevictable LRU Infrastructure: 12 - Memory control group interaction.
13 13 - Marking address spaces unevictable.
14The Unevictable LRU adds an additional LRU list to track unevictable pages 14 - Detecting Unevictable Pages.
15and to hide these pages from vmscan. This mechanism is based on a patch by 15 - vmscan's handling of unevictable pages.
16Larry Woodman of Red Hat to address several scalability problems with page 16
17 (*) mlock()'d pages.
18
19 - History.
20 - Basic management.
21 - mlock()/mlockall() system call handling.
22 - Filtering special vmas.
23 - munlock()/munlockall() system call handling.
24 - Migrating mlocked pages.
25 - mmap(MAP_LOCKED) system call handling.
26 - munmap()/exit()/exec() system call handling.
27 - try_to_unmap().
28 - try_to_munlock() reverse map scan.
29 - Page reclaim in shrink_*_list().
30
31
32============
33INTRODUCTION
34============
35
36This document describes the Linux memory manager's "Unevictable LRU"
37infrastructure and the use of this to manage several types of "unevictable"
38pages.
39
40The document attempts to provide the overall rationale behind this mechanism
41and the rationale for some of the design decisions that drove the
42implementation. The latter design rationale is discussed in the context of an
43implementation description. Admittedly, one can obtain the implementation
44details - the "what does it do?" - by reading the code. One hopes that the
45descriptions below add value by provide the answer to "why does it do that?".
46
47
48===================
49THE UNEVICTABLE LRU
50===================
51
52The Unevictable LRU facility adds an additional LRU list to track unevictable
53pages and to hide these pages from vmscan. This mechanism is based on a patch
54by Larry Woodman of Red Hat to address several scalability problems with page
17reclaim in Linux. The problems have been observed at customer sites on large 55reclaim in Linux. The problems have been observed at customer sites on large
18memory x86_64 systems. For example, a non-numal x86_64 platform with 128GB 56memory x86_64 systems.
19of main memory will have over 32 million 4k pages in a single zone. When a 57
20large fraction of these pages are not evictable for any reason [see below], 58To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
21vmscan will spend a lot of time scanning the LRU lists looking for the small 59main memory will have over 32 million 4k pages in a single zone. When a large
22fraction of pages that are evictable. This can result in a situation where 60fraction of these pages are not evictable for any reason [see below], vmscan
23all cpus are spending 100% of their time in vmscan for hours or days on end, 61will spend a lot of time scanning the LRU lists looking for the small fraction
24with the system completely unresponsive. 62of pages that are evictable. This can result in a situation where all CPUs are
25 63spending 100% of their time in vmscan for hours or days on end, with the system
26The Unevictable LRU infrastructure addresses the following classes of 64completely unresponsive.
27unevictable pages: 65
28 66The unevictable list addresses the following classes of unevictable pages:
29+ page owned by ramfs 67
30+ page mapped into SHM_LOCKed shared memory regions 68 (*) Those owned by ramfs.
31+ page mapped into VM_LOCKED [mlock()ed] vmas 69
32 70 (*) Those mapped into SHM_LOCK'd shared memory regions.
33The infrastructure might be able to handle other conditions that make pages 71
72 (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
73
74The infrastructure may also be able to handle other conditions that make pages
34unevictable, either by definition or by circumstance, in the future. 75unevictable, either by definition or by circumstance, in the future.
35 76
36 77
37The Unevictable LRU List 78THE UNEVICTABLE PAGE LIST
79-------------------------
38 80
39The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list 81The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
40called the "unevictable" list and an associated page flag, PG_unevictable, to 82called the "unevictable" list and an associated page flag, PG_unevictable, to
41indicate that the page is being managed on the unevictable list. The 83indicate that the page is being managed on the unevictable list.
42PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active 84
43flag in that it indicates on which LRU list a page resides when PG_lru is set. 85The PG_unevictable flag is analogous to, and mutually exclusive with, the
44The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU 86PG_active flag in that it indicates on which LRU list a page resides when
45Kconfig option. 87PG_lru is set. The unevictable list is compile-time configurable based on the
88UNEVICTABLE_LRU Kconfig option.
46 89
47The Unevictable LRU infrastructure maintains unevictable pages on an additional 90The Unevictable LRU infrastructure maintains unevictable pages on an additional
48LRU list for a few reasons: 91LRU list for a few reasons:
49 92
501) We get to "treat unevictable pages just like we treat other pages in the 93 (1) We get to "treat unevictable pages just like we treat other pages in the
51 system, which means we get to use the same code to manipulate them, the 94 system - which means we get to use the same code to manipulate them, the
52 same code to isolate them (for migrate, etc.), the same code to keep track 95 same code to isolate them (for migrate, etc.), the same code to keep track
53 of the statistics, etc..." [Rik van Riel] 96 of the statistics, etc..." [Rik van Riel]
97
98 (2) We want to be able to migrate unevictable pages between nodes for memory
99 defragmentation, workload management and memory hotplug. The linux kernel
100 can only migrate pages that it can successfully isolate from the LRU
101 lists. If we were to maintain pages elsewhere than on an LRU-like list,
102 where they can be found by isolate_lru_page(), we would prevent their
103 migration, unless we reworked migration code to find the unevictable pages
104 itself.
54 105
552) We want to be able to migrate unevictable pages between nodes--for memory
56 defragmentation, workload management and memory hotplug. The linux kernel
57 can only migrate pages that it can successfully isolate from the lru lists.
58 If we were to maintain pages elsewise than on an lru-like list, where they
59 can be found by isolate_lru_page(), we would prevent their migration, unless
60 we reworked migration code to find the unevictable pages.
61 106
107The unevictable list does not differentiate between file-backed and anonymous,
108swap-backed pages. This differentiation is only important while the pages are,
109in fact, evictable.
62 110
63The unevictable LRU list does not differentiate between file backed and swap 111The unevictable list benefits from the "arrayification" of the per-zone LRU
64backed [anon] pages. This differentiation is only important while the pages 112lists and statistics originally proposed and posted by Christoph Lameter.
65are, in fact, evictable.
66 113
67The unevictable LRU list benefits from the "arrayification" of the per-zone 114The unevictable list does not use the LRU pagevec mechanism. Rather,
68LRU lists and statistics originally proposed and posted by Christoph Lameter. 115unevictable pages are placed directly on the page's zone's unevictable list
116under the zone lru_lock. This allows us to prevent the stranding of pages on
117the unevictable list when one task has the page isolated from the LRU and other
118tasks are changing the "evictability" state of the page.
69 119
70The unevictable list does not use the lru pagevec mechanism. Rather,
71unevictable pages are placed directly on the page's zone's unevictable
72list under the zone lru_lock. The reason for this is to prevent stranding
73of pages on the unevictable list when one task has the page isolated from the
74lru and other tasks are changing the "evictability" state of the page.
75 120
121MEMORY CONTROL GROUP INTERACTION
122--------------------------------
76 123
77Unevictable LRU and Memory Controller Interaction 124The unevictable LRU facility interacts with the memory control group [aka
125memory controller; see Documentation/cgroups/memory.txt] by extending the
126lru_list enum.
127
128The memory controller data structure automatically gets a per-zone unevictable
129list as a result of the "arrayification" of the per-zone LRU lists (one per
130lru_list enum element). The memory controller tracks the movement of pages to
131and from the unevictable list.
78 132
79The memory controller data structure automatically gets a per zone unevictable
80lru list as a result of the "arrayification" of the per-zone LRU lists. The
81memory controller tracks the movement of pages to and from the unevictable list.
82When a memory control group comes under memory pressure, the controller will 133When a memory control group comes under memory pressure, the controller will
83not attempt to reclaim pages on the unevictable list. This has a couple of 134not attempt to reclaim pages on the unevictable list. This has a couple of
84effects. Because the pages are "hidden" from reclaim on the unevictable list, 135effects:
85the reclaim process can be more efficient, dealing only with pages that have 136
86a chance of being reclaimed. On the other hand, if too many of the pages 137 (1) Because the pages are "hidden" from reclaim on the unevictable list, the
87charged to the control group are unevictable, the evictable portion of the 138 reclaim process can be more efficient, dealing only with pages that have a
88working set of the tasks in the control group may not fit into the available 139 chance of being reclaimed.
89memory. This can cause the control group to thrash or to oom-kill tasks. 140
90 141 (2) On the other hand, if too many of the pages charged to the control group
91 142 are unevictable, the evictable portion of the working set of the tasks in
92Unevictable LRU: Detecting Unevictable Pages 143 the control group may not fit into the available memory. This can cause
93 144 the control group to thrash or to OOM-kill tasks.
94The function page_evictable(page, vma) in vmscan.c determines whether a 145
95page is evictable or not. For ramfs pages and pages in SHM_LOCKed regions, 146
96page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's 147MARKING ADDRESS SPACES UNEVICTABLE
97address space using a wrapper function. Wrapper functions are used to set, 148----------------------------------
98clear and test the flag to reduce the requirement for #ifdef's throughout the 149
99source code. AS_UNEVICTABLE is set on ramfs inode/mapping when it is created. 150For facilities such as ramfs none of the pages attached to the address space
100This flag remains for the life of the inode. 151may be evicted. To prevent eviction of any such pages, the AS_UNEVICTABLE
101 152address space flag is provided, and this can be manipulated by a filesystem
102For shared memory regions, AS_UNEVICTABLE is set when an application 153using a number of wrapper functions:
103successfully SHM_LOCKs the region and is removed when the region is 154
104SHM_UNLOCKed. Note that shmctl(SHM_LOCK, ...) does not populate the page 155 (*) void mapping_set_unevictable(struct address_space *mapping);
105tables for the region as does, for example, mlock(). So, we make no special 156
106effort to push any pages in the SHM_LOCKed region to the unevictable list. 157 Mark the address space as being completely unevictable.
107Vmscan will do this when/if it encounters the pages during reclaim. On 158
108SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the 159 (*) void mapping_clear_unevictable(struct address_space *mapping);
109unevictable list if no other condition keeps them unevictable. If a SHM_LOCKed 160
110region is destroyed, the pages are also "rescued" from the unevictable list in 161 Mark the address space as being evictable.
111the process of freeing them. 162
112 163 (*) int mapping_unevictable(struct address_space *mapping);
113page_evictable() detects mlock()ed pages by testing an additional page flag, 164
114PG_mlocked via the PageMlocked() wrapper. If the page is NOT mlocked, and a 165 Query the address space, and return true if it is completely
115non-NULL vma is supplied, page_evictable() will check whether the vma is 166 unevictable.
167
168These are currently used in two places in the kernel:
169
170 (1) By ramfs to mark the address spaces of its inodes when they are created,
171 and this mark remains for the life of the inode.
172
173 (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
174
175 Note that SHM_LOCK is not required to page in the locked pages if they're
176 swapped out; the application must touch the pages manually if it wants to
177 ensure they're in memory.
178
179
180DETECTING UNEVICTABLE PAGES
181---------------------------
182
183The function page_evictable() in vmscan.c determines whether a page is
184evictable or not using the query function outlined above [see section "Marking
185address spaces unevictable"] to check the AS_UNEVICTABLE flag.
186
187For address spaces that are so marked after being populated (as SHM regions
188might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
189the page tables for the region as does, for example, mlock(), nor need it make
190any special effort to push any pages in the SHM_LOCK'd area to the unevictable
191list. Instead, vmscan will do this if and when it encounters the pages during
192a reclamation scan.
193
194On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
195the pages in the region and "rescue" them from the unevictable list if no other
196condition is keeping them unevictable. If an unevictable region is destroyed,
197the pages are also "rescued" from the unevictable list in the process of
198freeing them.
199
200page_evictable() also checks for mlocked pages by testing an additional page
201flag, PG_mlocked (as wrapped by PageMlocked()). If the page is NOT mlocked,
202and a non-NULL VMA is supplied, page_evictable() will check whether the VMA is
116VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and 203VM_LOCKED via is_mlocked_vma(). is_mlocked_vma() will SetPageMlocked() and
117update the appropriate statistics if the vma is VM_LOCKED. This method allows 204update the appropriate statistics if the vma is VM_LOCKED. This method allows
118efficient "culling" of pages in the fault path that are being faulted in to 205efficient "culling" of pages in the fault path that are being faulted in to
119VM_LOCKED vmas. 206VM_LOCKED VMAs.
120 207
121 208
122Unevictable Pages and Vmscan [shrink_*_list()] 209VMSCAN'S HANDLING OF UNEVICTABLE PAGES
210--------------------------------------
123 211
124If unevictable pages are culled in the fault path, or moved to the unevictable 212If unevictable pages are culled in the fault path, or moved to the unevictable
125list at mlock() or mmap() time, vmscan will never encounter the pages until 213list at mlock() or mmap() time, vmscan will not encounter the pages until they
126they have become evictable again, for example, via munlock() and have been 214have become evictable again (via munlock() for example) and have been "rescued"
127"rescued" from the unevictable list. However, there may be situations where we 215from the unevictable list. However, there may be situations where we decide,
128decide, for the sake of expediency, to leave a unevictable page on one of the 216for the sake of expediency, to leave a unevictable page on one of the regular
129regular active/inactive LRU lists for vmscan to deal with. Vmscan checks for 217active/inactive LRU lists for vmscan to deal with. vmscan checks for such
130such pages in all of the shrink_{active|inactive|page}_list() functions and 218pages in all of the shrink_{active|inactive|page}_list() functions and will
131will "cull" such pages that it encounters--that is, it diverts those pages to 219"cull" such pages that it encounters: that is, it diverts those pages to the
132the unevictable list for the zone being scanned. 220unevictable list for the zone being scanned.
133 221
134There may be situations where a page is mapped into a VM_LOCKED vma, but the 222There may be situations where a page is mapped into a VM_LOCKED VMA, but the
135page is not marked as PageMlocked. Such pages will make it all the way to 223page is not marked as PG_mlocked. Such pages will make it all the way to
136shrink_page_list() where they will be detected when vmscan walks the reverse 224shrink_page_list() where they will be detected when vmscan walks the reverse
137map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK, shrink_page_list() 225map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK,
138will cull the page at that point. 226shrink_page_list() will cull the page at that point.
139 227
140To "cull" an unevictable page, vmscan simply puts the page back on the lru 228To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
141list using putback_lru_page()--the inverse operation to isolate_lru_page()-- 229using putback_lru_page() - the inverse operation to isolate_lru_page() - after
142after dropping the page lock. Because the condition which makes the page 230dropping the page lock. Because the condition which makes the page unevictable
143unevictable may change once the page is unlocked, putback_lru_page() will 231may change once the page is unlocked, putback_lru_page() will recheck the
144recheck the unevictable state of a page that it places on the unevictable lru 232unevictable state of a page that it places on the unevictable list. If the
145list. If the page has become unevictable, putback_lru_page() removes it from 233page has become unevictable, putback_lru_page() removes it from the list and
146the list and retries, including the page_unevictable() test. Because such a 234retries, including the page_unevictable() test. Because such a race is a rare
147race is a rare event and movement of pages onto the unevictable list should be 235event and movement of pages onto the unevictable list should be rare, these
148rare, these extra evictabilty checks should not occur in the majority of calls 236extra evictabilty checks should not occur in the majority of calls to
149to putback_lru_page(). 237putback_lru_page().
150 238
151 239
152Mlocked Page: Prior Work 240=============
241MLOCKED PAGES
242=============
153 243
154The "Unevictable Mlocked Pages" infrastructure is based on work originally 244The unevictable page list is also useful for mlock(), in addition to ramfs and
245SYSV SHM. Note that mlock() is only available in CONFIG_MMU=y situations; in
246NOMMU situations, all mappings are effectively mlocked.
247
248
249HISTORY
250-------
251
252The "Unevictable mlocked Pages" infrastructure is based on work originally
155posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU". 253posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
156Nick posted his patch as an alternative to a patch posted by Christoph 254Nick posted his patch as an alternative to a patch posted by Christoph Lameter
157Lameter to achieve the same objective--hiding mlocked pages from vmscan. 255to achieve the same objective: hiding mlocked pages from vmscan.
158In Nick's patch, he used one of the struct page lru list link fields as a count 256
159of VM_LOCKED vmas that map the page. This use of the link field for a count 257In Nick's patch, he used one of the struct page LRU list link fields as a count
160prevented the management of the pages on an LRU list. Thus, mlocked pages were 258of VM_LOCKED VMAs that map the page. This use of the link field for a count
161not migratable as isolate_lru_page() could not find them and the lru list link 259prevented the management of the pages on an LRU list, and thus mlocked pages
162field was not available to the migration subsystem. Nick resolved this by 260were not migratable as isolate_lru_page() could not find them, and the LRU list
163putting mlocked pages back on the lru list before attempting to isolate them, 261link field was not available to the migration subsystem.
164thus abandoning the count of VM_LOCKED vmas. When Nick's patch was integrated 262
165with the Unevictable LRU work, the count was replaced by walking the reverse 263Nick resolved this by putting mlocked pages back on the lru list before
166map to determine whether any VM_LOCKED vmas mapped the page. More on this 264attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs. When
167below. 265Nick's patch was integrated with the Unevictable LRU work, the count was
168 266replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
169 267mapped the page. More on this below.
170Mlocked Pages: Basic Management 268
171 269
172Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of 270BASIC MANAGEMENT
173unevictable pages. When such a page has been "noticed" by the memory 271----------------
174management subsystem, the page is marked with the PG_mlocked [PageMlocked()] 272
175flag. A PageMlocked() page will be placed on the unevictable LRU list when 273mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
176it is added to the LRU. Pages can be "noticed" by memory management in 274pages. When such a page has been "noticed" by the memory management subsystem,
177several places: 275the page is marked with the PG_mlocked flag. This can be manipulated using the
178 276PageMlocked() functions.
1791) in the mlock()/mlockall() system call handlers. 277
1802) in the mmap() system call handler when mmap()ing a region with the 278A PG_mlocked page will be placed on the unevictable list when it is added to
181 MAP_LOCKED flag, or mmap()ing a region in a task that has called 279the LRU. Such pages can be "noticed" by memory management in several places:
182 mlockall() with the MCL_FUTURE flag. Both of these conditions result 280
183 in the VM_LOCKED flag being set for the vma. 281 (1) in the mlock()/mlockall() system call handlers;
1843) in the fault path, if mlocked pages are "culled" in the fault path, 282
185 and when a VM_LOCKED stack segment is expanded. 283 (2) in the mmap() system call handler when mmapping a region with the
1864) as mentioned above, in vmscan:shrink_page_list() when attempting to 284 MAP_LOCKED flag;
187 reclaim a page in a VM_LOCKED vma via try_to_unmap(). 285
188 286 (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
189Mlocked pages become unlocked and rescued from the unevictable list when: 287 flag
190 288
1911) mapped in a range unlocked via the munlock()/munlockall() system calls. 289 (4) in the fault path, if mlocked pages are "culled" in the fault path,
1922) munmapped() out of the last VM_LOCKED vma that maps the page, including 290 and when a VM_LOCKED stack segment is expanded; or
193 unmapping at task exit. 291
1943) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file. 292 (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
1954) before a page is COWed in a VM_LOCKED vma. 293 reclaim a page in a VM_LOCKED VMA via try_to_unmap()
196 294
197 295all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
198Mlocked Pages: mlock()/mlockall() System Call Handling 296already have it set.
297
298mlocked pages become unlocked and rescued from the unevictable list when:
299
300 (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
301
302 (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
303 unmapping at task exit;
304
305 (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
306 or
307
308 (4) before a page is COW'd in a VM_LOCKED VMA.
309
310
311mlock()/mlockall() SYSTEM CALL HANDLING
312---------------------------------------
199 313
200Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup() 314Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
201for each vma in the range specified by the call. In the case of mlockall(), 315for each VMA in the range specified by the call. In the case of mlockall(),
202this is the entire active address space of the task. Note that mlock_fixup() 316this is the entire active address space of the task. Note that mlock_fixup()
203is used for both mlock()ing and munlock()ing a range of memory. A call to 317is used for both mlocking and munlocking a range of memory. A call to mlock()
204mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED 318an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
205is treated as a no-op--mlock_fixup() simply returns. 319treated as a no-op, and mlock_fixup() simply returns.
206 320
207If the vma passes some filtering described in "Mlocked Pages: Filtering Vmas" 321If the VMA passes some filtering as described in "Filtering Special Vmas"
208below, mlock_fixup() will attempt to merge the vma with its neighbors or split 322below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
209off a subset of the vma if the range does not cover the entire vma. Once the 323off a subset of the VMA if the range does not cover the entire VMA. Once the
210vma has been merged or split or neither, mlock_fixup() will call 324VMA has been merged or split or neither, mlock_fixup() will call
211__mlock_vma_pages_range() to fault in the pages via get_user_pages() and 325__mlock_vma_pages_range() to fault in the pages via get_user_pages() and to
212to mark the pages as mlocked via mlock_vma_page(). 326mark the pages as mlocked via mlock_vma_page().
213 327
214Note that the vma being mlocked might be mapped with PROT_NONE. In this case, 328Note that the VMA being mlocked might be mapped with PROT_NONE. In this case,
215get_user_pages() will be unable to fault in the pages. That's OK. If pages 329get_user_pages() will be unable to fault in the pages. That's okay. If pages
216do end up getting faulted into this VM_LOCKED vma, we'll handle them in the 330do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
217fault path or in vmscan. 331fault path or in vmscan.
218 332
219Also note that a page returned by get_user_pages() could be truncated or 333Also note that a page returned by get_user_pages() could be truncated or
220migrated out from under us, while we're trying to mlock it. To detect 334migrated out from under us, while we're trying to mlock it. To detect this,
221this, __mlock_vma_pages_range() tests the page_mapping after acquiring 335__mlock_vma_pages_range() checks page_mapping() after acquiring the page lock.
222the page lock. If the page is still associated with its mapping, we'll 336If the page is still associated with its mapping, we'll go ahead and call
223go ahead and call mlock_vma_page(). If the mapping is gone, we just 337mlock_vma_page(). If the mapping is gone, we just unlock the page and move on.
224unlock the page and move on. Worse case, this results in page mapped 338In the worst case, this will result in a page mapped in a VM_LOCKED VMA
225in a VM_LOCKED vma remaining on a normal LRU list without being 339remaining on a normal LRU list without being PageMlocked(). Again, vmscan will
226PageMlocked(). Again, vmscan will detect and cull such pages. 340detect and cull such pages.
227 341
228mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will 342mlock_vma_page() will call TestSetPageMlocked() for each page returned by
229TestSetPageMlocked() for each page returned by get_user_pages(). We use 343get_user_pages(). We use TestSetPageMlocked() because the page might already
230TestSetPageMlocked() because the page might already be mlocked by another 344be mlocked by another task/VMA and we don't want to do extra work. We
231task/vma and we don't want to do extra work. We especially do not want to 345especially do not want to count an mlocked page more than once in the
232count an mlocked page more than once in the statistics. If the page was 346statistics. If the page was already mlocked, mlock_vma_page() need do nothing
233already mlocked, mlock_vma_page() is done. 347more.
234 348
235If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the 349If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
236page from the LRU, as it is likely on the appropriate active or inactive list 350page from the LRU, as it is likely on the appropriate active or inactive list
237at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will 351at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put
238putback the page--putback_lru_page()--which will notice that the page is now 352back the page - by calling putback_lru_page() - which will notice that the page
239mlocked and divert the page to the zone's unevictable LRU list. If 353is now mlocked and divert the page to the zone's unevictable list. If
240mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle 354mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
241it later if/when it attempts to reclaim the page. 355it later if and when it attempts to reclaim the page.
242 356
243 357
244Mlocked Pages: Filtering Special Vmas 358FILTERING SPECIAL VMAS
359----------------------
245 360
246mlock_fixup() filters several classes of "special" vmas: 361mlock_fixup() filters several classes of "special" VMAs:
247 362
2481) vmas with VM_IO|VM_PFNMAP set are skipped entirely. The pages behind 3631) VMAs with VM_IO or VM_PFNMAP set are skipped entirely. The pages behind
249 these mappings are inherently pinned, so we don't need to mark them as 364 these mappings are inherently pinned, so we don't need to mark them as
250 mlocked. In any case, most of the pages have no struct page in which to 365 mlocked. In any case, most of the pages have no struct page in which to so
251 so mark the page. Because of this, get_user_pages() will fail for these 366 mark the page. Because of this, get_user_pages() will fail for these VMAs,
252 vmas, so there is no sense in attempting to visit them. 367 so there is no sense in attempting to visit them.
253 368
2542) vmas mapping hugetlbfs page are already effectively pinned into memory. 3692) VMAs mapping hugetlbfs page are already effectively pinned into memory. We
255 We don't need nor want to mlock() these pages. However, to preserve the 370 neither need nor want to mlock() these pages. However, to preserve the
256 prior behavior of mlock()--before the unevictable/mlock changes-- 371 prior behavior of mlock() - before the unevictable/mlock changes -
257 mlock_fixup() will call make_pages_present() in the hugetlbfs vma range 372 mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
258 to allocate the huge pages and populate the ptes. 373 allocate the huge pages and populate the ptes.
259 374
2603) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of 3753) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of
261 kernel pages, such as the vdso page, relay channel pages, etc. These pages 376 kernel pages, such as the VDSO page, relay channel pages, etc. These pages
262 are inherently unevictable and are not managed on the LRU lists. 377 are inherently unevictable and are not managed on the LRU lists.
263 mlock_fixup() treats these vmas the same as hugetlbfs vmas. It calls 378 mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls
264 make_pages_present() to populate the ptes. 379 make_pages_present() to populate the ptes.
265 380
266Note that for all of these special vmas, mlock_fixup() does not set the 381Note that for all of these special VMAs, mlock_fixup() does not set the
267VM_LOCKED flag. Therefore, we won't have to deal with them later during 382VM_LOCKED flag. Therefore, we won't have to deal with them later during
268munlock() or munmap()--for example, at task exit. Neither does mlock_fixup() 383munlock(), munmap() or task exit. Neither does mlock_fixup() account these
269account these vmas against the task's "locked_vm". 384VMAs against the task's "locked_vm".
270 385
271Mlocked Pages: Downgrading the Mmap Semaphore. 386
272 387munlock()/munlockall() SYSTEM CALL HANDLING
273mlock_fixup() must be called with the mmap semaphore held for write, because 388-------------------------------------------
274it may have to merge or split vmas. However, mlocking a large region of 389
275memory can take a long time--especially if vmscan must reclaim pages to 390The munlock() and munlockall() system calls are handled by the same functions -
276satisfy the regions requirements. Faulting in a large region with the mmap 391do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
277semaphore held for write can hold off other faults on the address space, in 392lock operation indicated by an argument. So, these system calls are also
278the case of a multi-threaded task. It can also hold off scans of the task's 393handled by mlock_fixup(). Again, if called for an already munlocked VMA,
279address space via /proc. While testing under heavy load, it was observed that 394mlock_fixup() simply returns. Because of the VMA filtering discussed above,
280the ps(1) command could be held off for many minutes while a large segment was 395VM_LOCKED will not be set in any "special" VMAs. So, these VMAs will be
281mlock()ed down.
282
283To address this issue, and to make the system more responsive during mlock()ing
284of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
285during the call to __mlock_vma_pages_range(). This works fine. However, the
286callers of mlock_fixup() expect the semaphore to be returned in write mode.
287So, mlock_fixup() "upgrades" the semphore to write mode. Linux does not
288support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
289and reacquire it in write mode. In a multi-threaded task, it is possible for
290the task memory map to change while the semaphore is dropped. Therefore,
291mlock_fixup() looks up the vma at the range start address after reacquiring
292the semaphore in write mode and verifies that it still covers the original
293range. If not, mlock_fixup() returns an error [-EAGAIN]. All callers of
294mlock_fixup() have been changed to deal with this new error condition.
295
296Note: when munlocking a region, all of the pages should already be resident--
297unless we have racing threads mlocking() and munlocking() regions. So,
298unlocking should not have to wait for page allocations nor faults of any kind.
299Therefore mlock_fixup() does not downgrade the semaphore for munlock().
300
301
302Mlocked Pages: munlock()/munlockall() System Call Handling
303
304The munlock() and munlockall() system calls are handled by the same functions--
305do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
306vs lock operation indicated by an argument. So, these system calls are also
307handled by mlock_fixup(). Again, if called for an already munlock()ed vma,
308mlock_fixup() simply returns. Because of the vma filtering discussed above,
309VM_LOCKED will not be set in any "special" vmas. So, these vmas will be
310ignored for munlock. 396ignored for munlock.
311 397
312If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off 398If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
313the specified range. The range is then munlocked via the function 399specified range. The range is then munlocked via the function
314__mlock_vma_pages_range()--the same function used to mlock a vma range-- 400__mlock_vma_pages_range() - the same function used to mlock a VMA range -
315passing a flag to indicate that munlock() is being performed. 401passing a flag to indicate that munlock() is being performed.
316 402
317Because the vma access protections could have been changed to PROT_NONE after 403Because the VMA access protections could have been changed to PROT_NONE after
318faulting in and mlocking pages, get_user_pages() was unreliable for visiting 404faulting in and mlocking pages, get_user_pages() was unreliable for visiting
319these pages for munlocking. Because we don't want to leave pages mlocked(), 405these pages for munlocking. Because we don't want to leave pages mlocked,
320get_user_pages() was enhanced to accept a flag to ignore the permissions when 406get_user_pages() was enhanced to accept a flag to ignore the permissions when
321fetching the pages--all of which should be resident as a result of previous 407fetching the pages - all of which should be resident as a result of previous
322mlock()ing. 408mlocking.
323 409
324For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling 410For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
325munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked 411munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
326flag using TestClearPageMlocked(). As with mlock_vma_page(), munlock_vma_page() 412flag using TestClearPageMlocked(). As with mlock_vma_page(),
327use the Test*PageMlocked() function to handle the case where the page might 413munlock_vma_page() use the Test*PageMlocked() function to handle the case where
328have already been unlocked by another task. If the page was mlocked, 414the page might have already been unlocked by another task. If the page was
329munlock_vma_page() updates that zone statistics for the number of mlocked 415mlocked, munlock_vma_page() updates that zone statistics for the number of
330pages. Note, however, that at this point we haven't checked whether the page 416mlocked pages. Note, however, that at this point we haven't checked whether
331is mapped by other VM_LOCKED vmas. 417the page is mapped by other VM_LOCKED VMAs.
332 418
333We can't call try_to_munlock(), the function that walks the reverse map to check 419We can't call try_to_munlock(), the function that walks the reverse map to
334for other VM_LOCKED vmas, without first isolating the page from the LRU. 420check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
335try_to_munlock() is a variant of try_to_unmap() and thus requires that the page 421try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
336not be on an lru list. [More on these below.] However, the call to 422not be on an LRU list [more on these below]. However, the call to
337isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). 423isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). So,
338So, we go ahead and clear PG_mlocked up front, as this might be the only chance 424we go ahead and clear PG_mlocked up front, as this might be the only chance we
339we have. If we can successfully isolate the page, we go ahead and 425have. If we can successfully isolate the page, we go ahead and
340try_to_munlock(), which will restore the PG_mlocked flag and update the zone 426try_to_munlock(), which will restore the PG_mlocked flag and update the zone
341page statistics if it finds another vma holding the page mlocked. If we fail 427page statistics if it finds another VMA holding the page mlocked. If we fail
342to isolate the page, we'll have left a potentially mlocked page on the LRU. 428to isolate the page, we'll have left a potentially mlocked page on the LRU.
343This is fine, because we'll catch it later when/if vmscan tries to reclaim the 429This is fine, because we'll catch it later if and if vmscan tries to reclaim
344page. This should be relatively rare. 430the page. This should be relatively rare.
345 431
346Mlocked Pages: Migrating Them... 432
347 433MIGRATING MLOCKED PAGES
348A page that is being migrated has been isolated from the lru lists and is 434-----------------------
349held locked across unmapping of the page, updating the page's mapping 435
350[address_space] entry and copying the contents and state, until the 436A page that is being migrated has been isolated from the LRU lists and is held
351page table entry has been replaced with an entry that refers to the new 437locked across unmapping of the page, updating the page's address space entry
352page. Linux supports migration of mlocked pages and other unevictable 438and copying the contents and state, until the page table entry has been
353pages. This involves simply moving the PageMlocked and PageUnevictable states 439replaced with an entry that refers to the new page. Linux supports migration
354from the old page to the new page. 440of mlocked pages and other unevictable pages. This involves simply moving the
355 441PG_mlocked and PG_unevictable states from the old page to the new page.
356Note that page migration can race with mlocking or munlocking of the same 442
357page. This has been discussed from the mlock/munlock perspective in the 443Note that page migration can race with mlocking or munlocking of the same page.
358respective sections above. Both processes [migration, m[un]locking], hold 444This has been discussed from the mlock/munlock perspective in the respective
359the page locked. This provides the first level of synchronization. Page 445sections above. Both processes (migration and m[un]locking) hold the page
360migration zeros out the page_mapping of the old page before unlocking it, 446locked. This provides the first level of synchronization. Page migration
361so m[un]lock can skip these pages by testing the page mapping under page 447zeros out the page_mapping of the old page before unlocking it, so m[un]lock
362lock. 448can skip these pages by testing the page mapping under page lock.
363 449
364When completing page migration, we place the new and old pages back onto the 450To complete page migration, we place the new and old pages back onto the LRU
365lru after dropping the page lock. The "unneeded" page--old page on success, 451after dropping the page lock. The "unneeded" page - old page on success, new
366new page on failure--will be freed when the reference count held by the 452page on failure - will be freed when the reference count held by the migration
367migration process is released. To ensure that we don't strand pages on the 453process is released. To ensure that we don't strand pages on the unevictable
368unevictable list because of a race between munlock and migration, page 454list because of a race between munlock and migration, page migration uses the
369migration uses the putback_lru_page() function to add migrated pages back to 455putback_lru_page() function to add migrated pages back to the LRU.
370the lru. 456
371 457
372 458mmap(MAP_LOCKED) SYSTEM CALL HANDLING
373Mlocked Pages: mmap(MAP_LOCKED) System Call Handling 459-------------------------------------
374 460
375In addition the the mlock()/mlockall() system calls, an application can request 461In addition the the mlock()/mlockall() system calls, an application can request
376that a region of memory be mlocked using the MAP_LOCKED flag with the mmap() 462that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
377call. Furthermore, any mmap() call or brk() call that expands the heap by a 463call. Furthermore, any mmap() call or brk() call that expands the heap by a
378task that has previously called mlockall() with the MCL_FUTURE flag will result 464task that has previously called mlockall() with the MCL_FUTURE flag will result
379in the newly mapped memory being mlocked. Before the unevictable/mlock changes, 465in the newly mapped memory being mlocked. Before the unevictable/mlock
380the kernel simply called make_pages_present() to allocate pages and populate 466changes, the kernel simply called make_pages_present() to allocate pages and
381the page table. 467populate the page table.
382 468
383To mlock a range of memory under the unevictable/mlock infrastructure, the 469To mlock a range of memory under the unevictable/mlock infrastructure, the
384mmap() handler and task address space expansion functions call 470mmap() handler and task address space expansion functions call
385mlock_vma_pages_range() specifying the vma and the address range to mlock. 471mlock_vma_pages_range() specifying the vma and the address range to mlock.
386mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in 472mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in
387"Mlocked Pages: Filtering Vmas". It will clear the VM_LOCKED flag, which will 473"Filtering Special VMAs". It will clear the VM_LOCKED flag, which will have
388have already been set by the caller, in filtered vmas. Thus these vma's need 474already been set by the caller, in filtered VMAs. Thus these VMA's need not be
389not be visited for munlock when the region is unmapped. 475visited for munlock when the region is unmapped.
390 476
391For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to 477For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
392fault/allocate the pages and mlock them. Again, like mlock_fixup(), 478fault/allocate the pages and mlock them. Again, like mlock_fixup(),
393mlock_vma_pages_range() downgrades the mmap semaphore to read mode before 479mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
394attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore 480attempting to fault/allocate and mlock the pages and "upgrades" the semaphore
395back to write mode before returning. 481back to write mode before returning.
396 482
397The callers of mlock_vma_pages_range() will have already added the memory 483The callers of mlock_vma_pages_range() will have already added the memory range
398range to be mlocked to the task's "locked_vm". To account for filtered vmas, 484to be mlocked to the task's "locked_vm". To account for filtered VMAs,
399mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the 485mlock_vma_pages_range() returns the number of pages NOT mlocked. All of the
400callers then subtract a non-negative return value from the task's locked_vm. 486callers then subtract a non-negative return value from the task's locked_vm. A
401A negative return value represent an error--for example, from get_user_pages() 487negative return value represent an error - for example, from get_user_pages()
402attempting to fault in a vma with PROT_NONE access. In this case, we leave 488attempting to fault in a VMA with PROT_NONE access. In this case, we leave the
403the memory range accounted as locked_vm, as the protections could be changed 489memory range accounted as locked_vm, as the protections could be changed later
404later and pages allocated into that region. 490and pages allocated into that region.
405 491
406 492
407Mlocked Pages: munmap()/exit()/exec() System Call Handling 493munmap()/exit()/exec() SYSTEM CALL HANDLING
494-------------------------------------------
408 495
409When unmapping an mlocked region of memory, whether by an explicit call to 496When unmapping an mlocked region of memory, whether by an explicit call to
410munmap() or via an internal unmap from exit() or exec() processing, we must 497munmap() or via an internal unmap from exit() or exec() processing, we must
411munlock the pages if we're removing the last VM_LOCKED vma that maps the pages. 498munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
412Before the unevictable/mlock changes, mlocking did not mark the pages in any 499Before the unevictable/mlock changes, mlocking did not mark the pages in any
413way, so unmapping them required no processing. 500way, so unmapping them required no processing.
414 501
415To munlock a range of memory under the unevictable/mlock infrastructure, the 502To munlock a range of memory under the unevictable/mlock infrastructure, the
416munmap() hander and task address space tear down function call 503munmap() handler and task address space call tear down function
417munlock_vma_pages_all(). The name reflects the observation that one always 504munlock_vma_pages_all(). The name reflects the observation that one always
418specifies the entire vma range when munlock()ing during unmap of a region. 505specifies the entire VMA range when munlock()ing during unmap of a region.
419Because of the vma filtering when mlocking() regions, only "normal" vmas that 506Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
420actually contain mlocked pages will be passed to munlock_vma_pages_all(). 507actually contain mlocked pages will be passed to munlock_vma_pages_all().
421 508
422munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup() 509munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
423for the munlock case, calls __munlock_vma_pages_range() to walk the page table 510for the munlock case, calls __munlock_vma_pages_range() to walk the page table
424for the vma's memory range and munlock_vma_page() each resident page mapped by 511for the VMA's memory range and munlock_vma_page() each resident page mapped by
425the vma. This effectively munlocks the page, only if this is the last 512the VMA. This effectively munlocks the page, only if this is the last
426VM_LOCKED vma that maps the page. 513VM_LOCKED VMA that maps the page.
427
428 514
429Mlocked Page: try_to_unmap()
430 515
431[Note: the code changes represented by this section are really quite small 516try_to_unmap()
432compared to the text to describe what happening and why, and to discuss the 517--------------
433implications.]
434 518
435Pages can, of course, be mapped into multiple vmas. Some of these vmas may 519Pages can, of course, be mapped into multiple VMAs. Some of these VMAs may
436have VM_LOCKED flag set. It is possible for a page mapped into one or more 520have VM_LOCKED flag set. It is possible for a page mapped into one or more
437VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one 521VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
438of the active or inactive LRU lists. This could happen if, for example, a 522of the active or inactive LRU lists. This could happen if, for example, a task
439task in the process of munlock()ing the page could not isolate the page from 523in the process of munlocking the page could not isolate the page from the LRU.
440the LRU. As a result, vmscan/shrink_page_list() might encounter such a page 524As a result, vmscan/shrink_page_list() might encounter such a page as described
441as described in "Unevictable Pages and Vmscan [shrink_*_list()]". To 525in section "vmscan's handling of unevictable pages". To handle this situation,
442handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED 526try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
443vmas while it is walking a page's reverse map. 527map.
444 528
445try_to_unmap() is always called, by either vmscan for reclaim or for page 529try_to_unmap() is always called, by either vmscan for reclaim or for page
446migration, with the argument page locked and isolated from the LRU. BUG_ON() 530migration, with the argument page locked and isolated from the LRU. Separate
447assertions enforce this requirement. Separate functions handle anonymous and 531functions handle anonymous and mapped file pages, as these types of pages have
448mapped file pages, as these types of pages have different reverse map 532different reverse map mechanisms.
449mechanisms. 533
450 534 (*) try_to_unmap_anon()
451 try_to_unmap_anon() 535
452 536 To unmap anonymous pages, each VMA in the list anchored in the anon_vma
453To unmap anonymous pages, each vma in the list anchored in the anon_vma must be 537 must be visited - at least until a VM_LOCKED VMA is encountered. If the
454visited--at least until a VM_LOCKED vma is encountered. If the page is being 538 page is being unmapped for migration, VM_LOCKED VMAs do not stop the
455unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked 539 process because mlocked pages are migratable. However, for reclaim, if
456pages are migratable. However, for reclaim, if the page is mapped into a 540 the page is mapped into a VM_LOCKED VMA, the scan stops.
457VM_LOCKED vma, the scan stops. try_to_unmap() attempts to acquire the mmap 541
458semphore of the mm_struct to which the vma belongs in read mode. If this is 542 try_to_unmap_anon() attempts to acquire in read mode the mmap semphore of
459successful, try_to_unmap() will mlock the page via mlock_vma_page()--we 543 the mm_struct to which the VMA belongs. If this is successful, it will
460wouldn't have gotten to try_to_unmap() if the page were already mlocked--and 544 mlock the page via mlock_vma_page() - we wouldn't have gotten to
461will return SWAP_MLOCK, indicating that the page is unevictable. If the 545 try_to_unmap_anon() if the page were already mlocked - and will return
462mmap semaphore cannot be acquired, we are not sure whether the page is really 546 SWAP_MLOCK, indicating that the page is unevictable.
463unevictable or not. In this case, try_to_unmap() will return SWAP_AGAIN. 547
464 548 If the mmap semaphore cannot be acquired, we are not sure whether the page
465 try_to_unmap_file() -- linear mappings 549 is really unevictable or not. In this case, try_to_unmap_anon() will
466 550 return SWAP_AGAIN.
467Unmapping of a mapped file page works the same, except that the scan visits 551
468all vmas that maps the page's index/page offset in the page's mapping's 552 (*) try_to_unmap_file() - linear mappings
469reverse map priority search tree. It must also visit each vma in the page's 553
470mapping's non-linear list, if the list is non-empty. As for anonymous pages, 554 Unmapping of a mapped file page works the same as for anonymous mappings,
471on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will 555 except that the scan visits all VMAs that map the page's index/page offset
472attempt to acquire the associated mm_struct's mmap semaphore to mlock the page, 556 in the page's mapping's reverse map priority search tree. It also visits
473returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not. 557 each VMA in the page's mapping's non-linear list, if the list is
474 558 non-empty.
475 try_to_unmap_file() -- non-linear mappings 559
476 560 As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file
477If a page's mapping contains a non-empty non-linear mapping vma list, then 561 page, try_to_unmap_file() will attempt to acquire the associated
478try_to_un{map|lock}() must also visit each vma in that list to determine 562 mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this
479whether the page is mapped in a VM_LOCKED vma. Again, the scan must visit 563 is successful, and SWAP_AGAIN, if not.
480all vmas in the non-linear list to ensure that the pages is not/should not be 564
481mlocked. If a VM_LOCKED vma is found in the list, the scan could terminate. 565 (*) try_to_unmap_file() - non-linear mappings
482However, there is no easy way to determine whether the page is actually mapped 566
483in a given vma--either for unmapping or testing whether the VM_LOCKED vma 567 If a page's mapping contains a non-empty non-linear mapping VMA list, then
484actually pins the page. 568 try_to_un{map|lock}() must also visit each VMA in that list to determine
485 569 whether the page is mapped in a VM_LOCKED VMA. Again, the scan must visit
486So, try_to_unmap_file() handles non-linear mappings by scanning a certain 570 all VMAs in the non-linear list to ensure that the pages is not/should not
487number of pages--a "cluster"--in each non-linear vma associated with the page's 571 be mlocked.
488mapping, for each file mapped page that vmscan tries to unmap. If this happens 572
489to unmap the page we're trying to unmap, try_to_unmap() will notice this on 573 If a VM_LOCKED VMA is found in the list, the scan could terminate.
490return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS. Otherwise, it 574 However, there is no easy way to determine whether the page is actually
491will return SWAP_AGAIN, causing vmscan to recirculate this page. We take 575 mapped in a given VMA - either for unmapping or testing whether the
492advantage of the cluster scan in try_to_unmap_cluster() as follows: 576 VM_LOCKED VMA actually pins the page.
493 577
494For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap 578 try_to_unmap_file() handles non-linear mappings by scanning a certain
495semaphore of the associated mm_struct for read without blocking. If this 579 number of pages - a "cluster" - in each non-linear VMA associated with the
496attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will 580 page's mapping, for each file mapped page that vmscan tries to unmap. If
497retain the mmap semaphore for the scan; otherwise it drops it here. Then, 581 this happens to unmap the page we're trying to unmap, try_to_unmap() will
498for each page in the cluster, if we're holding the mmap semaphore for a locked 582 notice this on return (page_mapcount(page) will be 0) and return
499vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page. This 583 SWAP_SUCCESS. Otherwise, it will return SWAP_AGAIN, causing vmscan to
500call is a no-op if the page is already locked, but will mlock any pages in 584 recirculate this page. We take advantage of the cluster scan in
501the non-linear mapping that happen to be unlocked. If one of the pages so 585 try_to_unmap_cluster() as follows:
502mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will 586
503return SWAP_MLOCK, rather than the default SWAP_AGAIN. This will allow vmscan 587 For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the
504to cull the page, rather than recirculating it on the inactive list. Again, 588 mmap semaphore of the associated mm_struct for read without blocking.
505if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns 589
506SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but 590 If this attempt is successful and the VMA is VM_LOCKED,
507couldn't be mlocked. 591 try_to_unmap_cluster() will retain the mmap semaphore for the scan;
508 592 otherwise it drops it here.
509 593
510Mlocked pages: try_to_munlock() Reverse Map Scan 594 Then, for each page in the cluster, if we're holding the mmap semaphore
511 595 for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to
512TODO/FIXME: a better name might be page_mlocked()--analogous to the 596 mlock the page. This call is a no-op if the page is already locked,
513page_referenced() reverse map walker. 597 but will mlock any pages in the non-linear mapping that happen to be
514 598 unlocked.
515When munlock_vma_page()--see "Mlocked Pages: munlock()/munlockall() 599
516System Call Handling" above--tries to munlock a page, it needs to 600 If one of the pages so mlocked is the page passed in to try_to_unmap(),
517determine whether or not the page is mapped by any VM_LOCKED vma, without 601 try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default
518actually attempting to unmap all ptes from the page. For this purpose, the 602 SWAP_AGAIN. This will allow vmscan to cull the page, rather than
519unevictable/mlock infrastructure introduced a variant of try_to_unmap() called 603 recirculating it on the inactive list.
520try_to_munlock(). 604
605 Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it
606 returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED
607 VMA, but couldn't be mlocked.
608
609
610try_to_munlock() REVERSE MAP SCAN
611---------------------------------
612
613 [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
614 page_referenced() reverse map walker.
615
616When munlock_vma_page() [see section "munlock()/munlockall() System Call
617Handling" above] tries to munlock a page, it needs to determine whether or not
618the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
619all PTEs from the page. For this purpose, the unevictable/mlock infrastructure
620introduced a variant of try_to_unmap() called try_to_munlock().
521 621
522try_to_munlock() calls the same functions as try_to_unmap() for anonymous and 622try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
523mapped file pages with an additional argument specifing unlock versus unmap 623mapped file pages with an additional argument specifing unlock versus unmap
524processing. Again, these functions walk the respective reverse maps looking 624processing. Again, these functions walk the respective reverse maps looking
525for VM_LOCKED vmas. When such a vma is found for anonymous pages and file 625for VM_LOCKED VMAs. When such a VMA is found for anonymous pages and file
526pages mapped in linear VMAs, as in the try_to_unmap() case, the functions 626pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
527attempt to acquire the associated mmap semphore, mlock the page via 627attempt to acquire the associated mmap semphore, mlock the page via
528mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the 628mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
529pre-clearing of the page's PG_mlocked done by munlock_vma_page. 629pre-clearing of the page's PG_mlocked done by munlock_vma_page.
530 630
531If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap 631If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap
532semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() 632semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() to
533to recycle the page on the inactive list and hope that it has better luck 633recycle the page on the inactive list and hope that it has better luck with the
534with the page next time. 634page next time.
535 635
536For file pages mapped into non-linear vmas, the try_to_munlock() logic works 636For file pages mapped into non-linear VMAs, the try_to_munlock() logic works
537slightly differently. On encountering a VM_LOCKED non-linear vma that might 637slightly differently. On encountering a VM_LOCKED non-linear VMA that might
538map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking 638map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the
539the page. munlock_vma_page() will just leave the page unlocked and let 639page. munlock_vma_page() will just leave the page unlocked and let vmscan deal
540vmscan deal with it--the usual fallback position. 640with it - the usual fallback position.
541 641
542Note that try_to_munlock()'s reverse map walk must visit every vma in a pages' 642Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
543reverse map to determine that a page is NOT mapped into any VM_LOCKED vma. 643reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
544However, the scan can terminate when it encounters a VM_LOCKED vma and can 644However, the scan can terminate when it encounters a VM_LOCKED VMA and can
545successfully acquire the vma's mmap semphore for read and mlock the page. 645successfully acquire the VMA's mmap semphore for read and mlock the page.
546Although try_to_munlock() can be called many [very many!] times when 646Although try_to_munlock() might be called a great many times when munlocking a
547munlock()ing a large region or tearing down a large address space that has been 647large region or tearing down a large address space that has been mlocked via
548mlocked via mlockall(), overall this is a fairly rare event. 648mlockall(), overall this is a fairly rare event.
549 649
550Mlocked Page: Page Reclaim in shrink_*_list() 650
551 651PAGE RECLAIM IN shrink_*_list()
552shrink_active_list() culls any obviously unevictable pages--i.e., 652-------------------------------
553!page_evictable(page, NULL)--diverting these to the unevictable lru 653
554list. However, shrink_active_list() only sees unevictable pages that 654shrink_active_list() culls any obviously unevictable pages - i.e.
555made it onto the active/inactive lru lists. Note that these pages do not 655!page_evictable(page, NULL) - diverting these to the unevictable list.
556have PageUnevictable set--otherwise, they would be on the unevictable list and 656However, shrink_active_list() only sees unevictable pages that made it onto the
557shrink_active_list would never see them. 657active/inactive lru lists. Note that these pages do not have PageUnevictable
658set - otherwise they would be on the unevictable list and shrink_active_list
659would never see them.
558 660
559Some examples of these unevictable pages on the LRU lists are: 661Some examples of these unevictable pages on the LRU lists are:
560 662
5611) ramfs pages that have been placed on the lru lists when first allocated. 663 (1) ramfs pages that have been placed on the LRU lists when first allocated.
664
665 (2) SHM_LOCK'd shared memory pages. shmctl(SHM_LOCK) does not attempt to
666 allocate or fault in the pages in the shared memory region. This happens
667 when an application accesses the page the first time after SHM_LOCK'ing
668 the segment.
562 669
5632) SHM_LOCKed shared memory pages. shmctl(SHM_LOCK) does not attempt to 670 (3) mlocked pages that could not be isolated from the LRU and moved to the
564 allocate or fault in the pages in the shared memory region. This happens 671 unevictable list in mlock_vma_page().
565 when an application accesses the page the first time after SHM_LOCKing
566 the segment.
567 672
5683) Mlocked pages that could not be isolated from the lru and moved to the 673 (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't
569 unevictable list in mlock_vma_page(). 674 acquire the VMA's mmap semaphore to test the flags and set PageMlocked.
675 munlock_vma_page() was forced to let the page back on to the normal LRU
676 list for vmscan to handle.
570 677
5713) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't 678shrink_inactive_list() also diverts any unevictable pages that it finds on the
572 acquire the vma's mmap semaphore to test the flags and set PageMlocked. 679inactive lists to the appropriate zone's unevictable list.
573 munlock_vma_page() was forced to let the page back on to the normal
574 LRU list for vmscan to handle.
575 680
576shrink_inactive_list() also culls any unevictable pages that it finds on 681shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
577the inactive lists, again diverting them to the appropriate zone's unevictable 682after shrink_active_list() had moved them to the inactive list, or pages mapped
578lru list. shrink_inactive_list() should only see SHM_LOCKed pages that became 683into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
579SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or 684recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter,
580pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from 685but will pass on to shrink_page_list().
581the lru to recheck via try_to_munlock(). shrink_inactive_list() won't notice
582the latter, but will pass on to shrink_page_list().
583 686
584shrink_page_list() again culls obviously unevictable pages that it could 687shrink_page_list() again culls obviously unevictable pages that it could
585encounter for similar reason to shrink_inactive_list(). Pages mapped into 688encounter for similar reason to shrink_inactive_list(). Pages mapped into
586VM_LOCKED vmas but without PG_mlocked set will make it all the way to 689VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
587try_to_unmap(). shrink_page_list() will divert them to the unevictable list 690try_to_unmap(). shrink_page_list() will divert them to the unevictable list
588when try_to_unmap() returns SWAP_MLOCK, as discussed above. 691when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt
new file mode 100644
index 000000000000..127839e53043
--- /dev/null
+++ b/Documentation/watchdog/hpwdt.txt
@@ -0,0 +1,84 @@
1Last reviewed: 06/02/2009
2
3 HP iLO2 NMI Watchdog Driver
4 NMI sourcing for iLO2 based ProLiant Servers
5 Documentation and Driver by
6 Thomas Mingarelli <thomas.mingarelli@hp.com>
7
8 The HP iLO2 NMI Watchdog driver is a kernel module that provides basic
9 watchdog functionality and the added benefit of NMI sourcing. Both the
10 watchdog functionality and the NMI sourcing capability need to be enabled
11 by the user. Remember that the two modes are not dependant on one another.
12 A user can have the NMI sourcing without the watchdog timer and vice-versa.
13
14 Watchdog functionality is enabled like any other common watchdog driver. That
15 is, an application needs to be started that kicks off the watchdog timer. A
16 basic application exists in the Documentation/watchdog/src directory called
17 watchdog-test.c. Simply compile the C file and kick it off. If the system
18 gets into a bad state and hangs, the HP ProLiant iLO 2 timer register will
19 not be updated in a timely fashion and a hardware system reset (also known as
20 an Automatic Server Recovery (ASR)) event will occur.
21
22 The hpwdt driver also has three (3) module parameters. They are the following:
23
24 soft_margin - allows the user to set the watchdog timer value
25 allow_kdump - allows the user to save off a kernel dump image after an NMI
26 nowayout - basic watchdog parameter that does not allow the timer to
27 be restarted or an impending ASR to be escaped.
28
29 NOTE: More information about watchdog drivers in general, including the ioctl
30 interface to /dev/watchdog can be found in
31 Documentation/watchdog/watchdog-api.txt and Documentation/IPMI.txt.
32
33 The NMI sourcing capability is disabled when the driver discovers that the
34 nmi_watchdog is turned on (nmi_watchdog = 1). This is due to the inability to
35 distinguish between "NMI Watchdog Ticks" and "HW generated NMI events" in the
36 Linux kernel. What this means is that the hpwdt nmi handler code is called
37 each time the NMI signal fires off. This could amount to several thousands of
38 NMIs in a matter of seconds. If a user sees the Linux kernel's "dazed and
39 confused" message in the logs or if the system gets into a hung state, then
40 the user should reboot with nmi_watchdog=0.
41
42 1. If the kernel has not been booted with nmi_watchdog turned off then
43 edit /boot/grub/menu.lst and place the nmi_watchdog=0 at the end of the
44 currently booting kernel line.
45 2. reboot the sever
46
47 Now, the hpwdt can successfully receive and source the NMI and provide a log
48 message that details the reason for the NMI (as determined by the HP BIOS).
49
50 Below is a list of NMIs the HP BIOS understands along with the associated
51 code (reason):
52
53 No source found 00h
54
55 Uncorrectable Memory Error 01h
56
57 ASR NMI 1Bh
58
59 PCI Parity Error 20h
60
61 NMI Button Press 27h
62
63 SB_BUS_NMI 28h
64
65 ILO Doorbell NMI 29h
66
67 ILO IOP NMI 2Ah
68
69 ILO Watchdog NMI 2Bh
70
71 Proc Throt NMI 2Ch
72
73 Front Side Bus NMI 2Dh
74
75 PCI Express Error 2Fh
76
77 DMA controller NMI 30h
78
79 Hypertransport/CSI Error 31h
80
81
82
83 -- Tom Mingarelli
84 (thomas.mingarelli@hp.com)
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 7b4596ac4120..8da3a795083f 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -50,6 +50,10 @@ Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
50Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical 50Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
51 pointer to single linked list of struct setup_data. 51 pointer to single linked list of struct setup_data.
52 52
53Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
54 beyond the kernel_alignment added, new init_size and
55 pref_address fields. Added extended boot loader IDs.
56
53**** MEMORY LAYOUT 57**** MEMORY LAYOUT
54 58
55The traditional memory map for the kernel loader, used for Image or 59The traditional memory map for the kernel loader, used for Image or
@@ -158,7 +162,7 @@ Offset Proto Name Meaning
1580202/4 2.00+ header Magic signature "HdrS" 1620202/4 2.00+ header Magic signature "HdrS"
1590206/2 2.00+ version Boot protocol version supported 1630206/2 2.00+ version Boot protocol version supported
1600208/4 2.00+ realmode_swtch Boot loader hook (see below) 1640208/4 2.00+ realmode_swtch Boot loader hook (see below)
161020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) 165020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
162020E/2 2.00+ kernel_version Pointer to kernel version string 166020E/2 2.00+ kernel_version Pointer to kernel version string
1630210/1 2.00+ type_of_loader Boot loader identifier 1670210/1 2.00+ type_of_loader Boot loader identifier
1640211/1 2.00+ loadflags Boot protocol option flags 1680211/1 2.00+ loadflags Boot protocol option flags
@@ -168,12 +172,14 @@ Offset Proto Name Meaning
168021C/4 2.00+ ramdisk_size initrd size (set by boot loader) 172021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
1690220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only 1730220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
1700224/2 2.01+ heap_end_ptr Free memory after setup end 1740224/2 2.01+ heap_end_ptr Free memory after setup end
1710226/2 N/A pad1 Unused 1750226/1 2.02+(3 ext_loader_ver Extended boot loader version
1760227/1 2.02+(3 ext_loader_type Extended boot loader ID
1720228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 1770228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
173022C/4 2.03+ initrd_addr_max Highest legal initrd address 178022C/4 2.03+ ramdisk_max Highest legal initrd address
1740230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 1790230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
1750234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 1800234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
1760235/3 N/A pad2 Unused 1810235/1 2.10+ min_alignment Minimum alignment, as a power of two
1820236/2 N/A pad3 Unused
1770238/4 2.06+ cmdline_size Maximum size of the kernel command line 1830238/4 2.06+ cmdline_size Maximum size of the kernel command line
178023C/4 2.07+ hardware_subarch Hardware subarchitecture 184023C/4 2.07+ hardware_subarch Hardware subarchitecture
1790240/8 2.07+ hardware_subarch_data Subarchitecture-specific data 1850240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
@@ -181,6 +187,8 @@ Offset Proto Name Meaning
181024C/4 2.08+ payload_length Length of kernel payload 187024C/4 2.08+ payload_length Length of kernel payload
1820250/8 2.09+ setup_data 64-bit physical pointer to linked list 1880250/8 2.09+ setup_data 64-bit physical pointer to linked list
183 of struct setup_data 189 of struct setup_data
1900258/8 2.10+ pref_address Preferred loading address
1910260/4 2.10+ init_size Linear memory required during initialization
184 192
185(1) For backwards compatibility, if the setup_sects field contains 0, the 193(1) For backwards compatibility, if the setup_sects field contains 0, the
186 real value is 4. 194 real value is 4.
@@ -189,6 +197,8 @@ Offset Proto Name Meaning
189 field are unusable, which means the size of a bzImage kernel 197 field are unusable, which means the size of a bzImage kernel
190 cannot be determined. 198 cannot be determined.
191 199
200(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
201
192If the "HdrS" (0x53726448) magic number is not found at offset 0x202, 202If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
193the boot protocol version is "old". Loading an old kernel, the 203the boot protocol version is "old". Loading an old kernel, the
194following parameters should be assumed: 204following parameters should be assumed:
@@ -299,14 +309,14 @@ Protocol: 2.00+
299 e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version 309 e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
300 10.17. 310 10.17.
301 311
302Field name: readmode_swtch 312Field name: realmode_swtch
303Type: modify (optional) 313Type: modify (optional)
304Offset/size: 0x208/4 314Offset/size: 0x208/4
305Protocol: 2.00+ 315Protocol: 2.00+
306 316
307 Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) 317 Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
308 318
309Field name: start_sys 319Field name: start_sys_seg
310Type: read 320Type: read
311Offset/size: 0x20c/2 321Offset/size: 0x20c/2
312Protocol: 2.00+ 322Protocol: 2.00+
@@ -342,18 +352,32 @@ Protocol: 2.00+
342 0xTV here, where T is an identifier for the boot loader and V is 352 0xTV here, where T is an identifier for the boot loader and V is
343 a version number. Otherwise, enter 0xFF here. 353 a version number. Otherwise, enter 0xFF here.
344 354
355 For boot loader IDs above T = 0xD, write T = 0xE to this field and
356 write the extended ID minus 0x10 to the ext_loader_type field.
357 Similarly, the ext_loader_ver field can be used to provide more than
358 four bits for the bootloader version.
359
360 For example, for T = 0x15, V = 0x234, write:
361
362 type_of_loader <- 0xE4
363 ext_loader_type <- 0x05
364 ext_loader_ver <- 0x23
365
345 Assigned boot loader ids: 366 Assigned boot loader ids:
346 0 LILO (0x00 reserved for pre-2.00 bootloader) 367 0 LILO (0x00 reserved for pre-2.00 bootloader)
347 1 Loadlin 368 1 Loadlin
348 2 bootsect-loader (0x20, all other values reserved) 369 2 bootsect-loader (0x20, all other values reserved)
349 3 SYSLINUX 370 3 Syslinux
350 4 EtherBoot 371 4 Etherboot/gPXE
351 5 ELILO 372 5 ELILO
352 7 GRUB 373 7 GRUB
353 8 U-BOOT 374 8 U-Boot
354 9 Xen 375 9 Xen
355 A Gujin 376 A Gujin
356 B Qemu 377 B Qemu
378 C Arcturus Networks uCbootloader
379 E Extended (see ext_loader_type)
380 F Special (0xFF = undefined)
357 381
358 Please contact <hpa@zytor.com> if you need a bootloader ID 382 Please contact <hpa@zytor.com> if you need a bootloader ID
359 value assigned. 383 value assigned.
@@ -452,6 +476,35 @@ Protocol: 2.01+
452 Set this field to the offset (from the beginning of the real-mode 476 Set this field to the offset (from the beginning of the real-mode
453 code) of the end of the setup stack/heap, minus 0x0200. 477 code) of the end of the setup stack/heap, minus 0x0200.
454 478
479Field name: ext_loader_ver
480Type: write (optional)
481Offset/size: 0x226/1
482Protocol: 2.02+
483
484 This field is used as an extension of the version number in the
485 type_of_loader field. The total version number is considered to be
486 (type_of_loader & 0x0f) + (ext_loader_ver << 4).
487
488 The use of this field is boot loader specific. If not written, it
489 is zero.
490
491 Kernels prior to 2.6.31 did not recognize this field, but it is safe
492 to write for protocol version 2.02 or higher.
493
494Field name: ext_loader_type
495Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
496Offset/size: 0x227/1
497Protocol: 2.02+
498
499 This field is used as an extension of the type number in
500 type_of_loader field. If the type in type_of_loader is 0xE, then
501 the actual type is (ext_loader_type + 0x10).
502
503 This field is ignored if the type in type_of_loader is not 0xE.
504
505 Kernels prior to 2.6.31 did not recognize this field, but it is safe
506 to write for protocol version 2.02 or higher.
507
455Field name: cmd_line_ptr 508Field name: cmd_line_ptr
456Type: write (obligatory) 509Type: write (obligatory)
457Offset/size: 0x228/4 510Offset/size: 0x228/4
@@ -468,7 +521,7 @@ Protocol: 2.02+
468 zero, the kernel will assume that your boot loader does not support 521 zero, the kernel will assume that your boot loader does not support
469 the 2.02+ protocol. 522 the 2.02+ protocol.
470 523
471Field name: initrd_addr_max 524Field name: ramdisk_max
472Type: read 525Type: read
473Offset/size: 0x22c/4 526Offset/size: 0x22c/4
474Protocol: 2.03+ 527Protocol: 2.03+
@@ -481,11 +534,19 @@ Protocol: 2.03+
481 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) 534 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
482 535
483Field name: kernel_alignment 536Field name: kernel_alignment
484Type: read (reloc) 537Type: read/modify (reloc)
485Offset/size: 0x230/4 538Offset/size: 0x230/4
486Protocol: 2.05+ 539Protocol: 2.05+ (read), 2.10+ (modify)
540
541 Alignment unit required by the kernel (if relocatable_kernel is
542 true.) A relocatable kernel that is loaded at an alignment
543 incompatible with the value in this field will be realigned during
544 kernel initialization.
487 545
488 Alignment unit required by the kernel (if relocatable_kernel is true.) 546 Starting with protocol version 2.10, this reflects the kernel
547 alignment preferred for optimal performance; it is possible for the
548 loader to modify this field to permit a lesser alignment. See the
549 min_alignment and pref_address field below.
489 550
490Field name: relocatable_kernel 551Field name: relocatable_kernel
491Type: read (reloc) 552Type: read (reloc)
@@ -497,6 +558,22 @@ Protocol: 2.05+
497 After loading, the boot loader must set the code32_start field to 558 After loading, the boot loader must set the code32_start field to
498 point to the loaded code, or to a boot loader hook. 559 point to the loaded code, or to a boot loader hook.
499 560
561Field name: min_alignment
562Type: read (reloc)
563Offset/size: 0x235/1
564Protocol: 2.10+
565
566 This field, if nonzero, indicates as a power of two the minimum
567 alignment required, as opposed to preferred, by the kernel to boot.
568 If a boot loader makes use of this field, it should update the
569 kernel_alignment field with the alignment unit desired; typically:
570
571 kernel_alignment = 1 << min_alignment
572
573 There may be a considerable performance cost with an excessively
574 misaligned kernel. Therefore, a loader should typically try each
575 power-of-two alignment from kernel_alignment down to this alignment.
576
500Field name: cmdline_size 577Field name: cmdline_size
501Type: read 578Type: read
502Offset/size: 0x238/4 579Offset/size: 0x238/4
@@ -542,7 +619,10 @@ Protocol: 2.08+
542 619
543 The payload may be compressed. The format of both the compressed and 620 The payload may be compressed. The format of both the compressed and
544 uncompressed data should be determined using the standard magic 621 uncompressed data should be determined using the standard magic
545 numbers. Currently only gzip compressed ELF is used. 622 numbers. The currently supported compression formats are gzip
623 (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A) and LZMA
624 (magic number 5D 00). The uncompressed payload is currently always ELF
625 (magic number 7F 45 4C 46).
546 626
547Field name: payload_length 627Field name: payload_length
548Type: read 628Type: read
@@ -578,6 +658,36 @@ Protocol: 2.09+
578 sure to consider the case where the linked list already contains 658 sure to consider the case where the linked list already contains
579 entries. 659 entries.
580 660
661Field name: pref_address
662Type: read (reloc)
663Offset/size: 0x258/8
664Protocol: 2.10+
665
666 This field, if nonzero, represents a preferred load address for the
667 kernel. A relocating bootloader should attempt to load at this
668 address if possible.
669
670 A non-relocatable kernel will unconditionally move itself and to run
671 at this address.
672
673Field name: init_size
674Type: read
675Offset/size: 0x25c/4
676
677 This field indicates the amount of linear contiguous memory starting
678 at the kernel runtime start address that the kernel needs before it
679 is capable of examining its memory map. This is not the same thing
680 as the total amount of memory the kernel needs to boot, but it can
681 be used by a relocating boot loader to help select a safe load
682 address for the kernel.
683
684 The kernel runtime start address is determined by the following algorithm:
685
686 if (relocatable_kernel)
687 runtime_start = align_up(load_address, kernel_alignment)
688 else
689 runtime_start = pref_address
690
581 691
582**** THE IMAGE CHECKSUM 692**** THE IMAGE CHECKSUM
583 693
diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt
new file mode 100644
index 000000000000..607b1a016064
--- /dev/null
+++ b/Documentation/x86/earlyprintk.txt
@@ -0,0 +1,101 @@
1
2Mini-HOWTO for using the earlyprintk=dbgp boot option with a
3USB2 Debug port key and a debug cable, on x86 systems.
4
5You need two computers, the 'USB debug key' special gadget and
6and two USB cables, connected like this:
7
8 [host/target] <-------> [USB debug key] <-------> [client/console]
9
101. There are three specific hardware requirements:
11
12 a.) Host/target system needs to have USB debug port capability.
13
14 You can check this capability by looking at a 'Debug port' bit in
15 the lspci -vvv output:
16
17 # lspci -vvv
18 ...
19 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
20 Subsystem: Lenovo ThinkPad T61
21 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
22 Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
23 Latency: 0
24 Interrupt: pin D routed to IRQ 19
25 Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
26 Capabilities: [50] Power Management version 2
27 Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
28 Status: D0 PME-Enable- DSel=0 DScale=0 PME+
29 Capabilities: [58] Debug port: BAR=1 offset=00a0
30 ^^^^^^^^^^^ <==================== [ HERE ]
31 Kernel driver in use: ehci_hcd
32 Kernel modules: ehci-hcd
33 ...
34
35( If your system does not list a debug port capability then you probably
36 wont be able to use the USB debug key. )
37
38 b.) You also need a Netchip USB debug cable/key:
39
40 http://www.plxtech.com/products/NET2000/NET20DC/default.asp
41
42 This is a small blue plastic connector with two USB connections,
43 it draws power from its USB connections.
44
45 c.) Thirdly, you need a second client/console system with a regular USB port.
46
472. Software requirements:
48
49 a.) On the host/target system:
50
51 You need to enable the following kernel config option:
52
53 CONFIG_EARLY_PRINTK_DBGP=y
54
55 And you need to add the boot command line: "earlyprintk=dbgp".
56 (If you are using Grub, append it to the 'kernel' line in
57 /etc/grub.conf)
58
59 NOTE: normally earlyprintk console gets turned off once the
60 regular console is alive - use "earlyprintk=dbgp,keep" to keep
61 this channel open beyond early bootup. This can be useful for
62 debugging crashes under Xorg, etc.
63
64 b.) On the client/console system:
65
66 You should enable the following kernel config option:
67
68 CONFIG_USB_SERIAL_DEBUG=y
69
70 On the next bootup with the modified kernel you should
71 get a /dev/ttyUSBx device(s).
72
73 Now this channel of kernel messages is ready to be used: start
74 your favorite terminal emulator (minicom, etc.) and set
75 it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to
76 see the raw output.
77
78 c.) On Nvidia Southbridge based systems: the kernel will try to probe
79 and find out which port has debug device connected.
80
813. Testing that it works fine:
82
83 You can test the output by using earlyprintk=dbgp,keep and provoking
84 kernel messages on the host/target system. You can provoke a harmless
85 kernel message by for example doing:
86
87 echo h > /proc/sysrq-trigger
88
89 On the host/target system you should see this help line in "dmesg" output:
90
91 SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z)
92
93 On the client/console system do:
94
95 cat /dev/ttyUSB0
96
97 And you should see the help line above displayed shortly after you've
98 provoked it on the host system.
99
100If it does not work then please ask about it on the linux-kernel@vger.kernel.org
101mailing list or contact the x86 maintainers.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c13040a718..29a6ff8bc7d3 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here.
5 5
6Machine check 6Machine check
7 7
8 mce=off disable machine check 8 Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
9 mce=bootlog Enable logging of machine checks left over from booting. 9
10 Disabled by default on AMD because some BIOS leave bogus ones. 10 mce=off
11 If your BIOS doesn't do that it's a good idea to enable though 11 Disable machine check
12 to make sure you log even machine check events that result 12 mce=no_cmci
13 in a reboot. On Intel systems it is enabled by default. 13 Disable CMCI(Corrected Machine Check Interrupt) that
14 Intel processor supports. Usually this disablement is
15 not recommended, but it might be handy if your hardware
16 is misbehaving.
17 Note that you'll get more problems without CMCI than with
18 due to the shared banks, i.e. you might get duplicated
19 error logs.
20 mce=dont_log_ce
21 Don't make logs for corrected errors. All events reported
22 as corrected are silently cleared by OS.
23 This option will be useful if you have no interest in any
24 of corrected errors.
25 mce=ignore_ce
26 Disable features for corrected errors, e.g. polling timer
27 and CMCI. All events reported as corrected are not cleared
28 by OS and remained in its error banks.
29 Usually this disablement is not recommended, however if
30 there is an agent checking/clearing corrected errors
31 (e.g. BIOS or hardware monitoring applications), conflicting
32 with OS's error handling, and you cannot deactivate the agent,
33 then this option will be a help.
34 mce=bootlog
35 Enable logging of machine checks left over from booting.
36 Disabled by default on AMD because some BIOS leave bogus ones.
37 If your BIOS doesn't do that it's a good idea to enable though
38 to make sure you log even machine check events that result
39 in a reboot. On Intel systems it is enabled by default.
14 mce=nobootlog 40 mce=nobootlog
15 Disable boot machine check logging. 41 Disable boot machine check logging.
16 mce=tolerancelevel (number) 42 mce=tolerancelevel[,monarchtimeout] (number,number)
43 tolerance levels:
17 0: always panic on uncorrected errors, log corrected errors 44 0: always panic on uncorrected errors, log corrected errors
18 1: panic or SIGBUS on uncorrected errors, log corrected errors 45 1: panic or SIGBUS on uncorrected errors, log corrected errors
19 2: SIGBUS or log uncorrected errors, log corrected errors 46 2: SIGBUS or log uncorrected errors, log corrected errors
20 3: never panic or SIGBUS, log all errors (for testing only) 47 3: never panic or SIGBUS, log all errors (for testing only)
21 Default is 1 48 Default is 1
22 Can be also set using sysfs which is preferable. 49 Can be also set using sysfs which is preferable.
50 monarchtimeout:
51 Sets the time in us to wait for other CPUs on machine checks. 0
52 to disable.
23 53
24 nomce (for compatibility with i386): same as mce=off 54 nomce (for compatibility with i386): same as mce=off
25 55
@@ -150,11 +180,6 @@ NUMA
150 Otherwise, the remaining system RAM is allocated to an 180 Otherwise, the remaining system RAM is allocated to an
151 additional node. 181 additional node.
152 182
153 numa=hotadd=percent
154 Only allow hotadd memory to preallocate page structures upto
155 percent of already available memory.
156 numa=hotadd=0 will disable hotadd memory.
157
158ACPI 183ACPI
159 184
160 acpi=off Don't enable ACPI 185 acpi=off Don't enable ACPI
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets
index 33bb56655991..0f11d9becb0b 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets
@@ -7,7 +7,8 @@ you can create fake NUMA nodes that represent contiguous chunks of memory and
7assign them to cpusets and their attached tasks. This is a way of limiting the 7assign them to cpusets and their attached tasks. This is a way of limiting the
8amount of system memory that are available to a certain class of tasks. 8amount of system memory that are available to a certain class of tasks.
9 9
10For more information on the features of cpusets, see Documentation/cpusets.txt. 10For more information on the features of cpusets, see
11Documentation/cgroups/cpusets.txt.
11There are a number of different configurations you can use for your needs. For 12There are a number of different configurations you can use for your needs. For
12more information on the numa=fake command line option and its various ways of 13more information on the numa=fake command line option and its various ways of
13configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt. 14configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
@@ -32,7 +33,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
32 On node 3 totalpages: 131072 33 On node 3 totalpages: 131072
33 34
34Now following the instructions for mounting the cpusets filesystem from 35Now following the instructions for mounting the cpusets filesystem from
35Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory 36Documentation/cgroups/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
36address spaces) to individual cpusets: 37address spaces) to individual cpusets:
37 38
38 [root@xroads /]# mkdir exampleset 39 [root@xroads /]# mkdir exampleset
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index a05e58e7b159..b1fb30273286 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -41,7 +41,9 @@ check_interval
41 the polling interval. When the poller stops finding MCEs, it 41 the polling interval. When the poller stops finding MCEs, it
42 triggers an exponential backoff (poll less often) on the polling 42 triggers an exponential backoff (poll less often) on the polling
43 interval. The check_interval variable is both the initial and 43 interval. The check_interval variable is both the initial and
44 maximum polling interval. 44 maximum polling interval. 0 means no polling for corrected machine
45 check errors (but some corrected errors might be still reported
46 in other ways)
45 47
46tolerant 48tolerant
47 Tolerance level. When a machine check exception occurs for a non 49 Tolerance level. When a machine check exception occurs for a non
@@ -67,6 +69,10 @@ trigger
67 Program to run when a machine check event is detected. 69 Program to run when a machine check event is detected.
68 This is an alternative to running mcelog regularly from cron 70 This is an alternative to running mcelog regularly from cron
69 and allows to detect events faster. 71 and allows to detect events faster.
72monarch_timeout
73 How long to wait for the other CPUs to machine check too on a
74 exception. 0 to disable waiting for other CPUs.
75 Unit: us
70 76
71TBD document entries for AMD threshold interrupt configuration 77TBD document entries for AMD threshold interrupt configuration
72 78
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 29b52b14d0b4..d6498e3cd713 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables:
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
7hole caused by [48:63] sign extension 7hole caused by [48:63] sign extension
8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole 8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
9ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory 9ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
10ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole 10ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space 11ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) 12ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
13... unused hole ... 14... unused hole ...
14ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 15ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
15ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space 16ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space