aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-block59
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss33
-rw-r--r--Documentation/ABI/testing/sysfs-devices-cache_disable18
-rw-r--r--Documentation/Changes19
-rw-r--r--Documentation/CodingStyle4
-rw-r--r--Documentation/DMA-API.txt16
-rw-r--r--Documentation/DocBook/Makefile3
-rw-r--r--Documentation/DocBook/mac80211.tmpl1
-rw-r--r--Documentation/DocBook/tracepoint.tmpl89
-rw-r--r--Documentation/RCU/rculist_nulls.txt2
-rw-r--r--Documentation/RCU/trace.txt102
-rw-r--r--Documentation/SM501.txt2
-rw-r--r--Documentation/Smack.txt20
-rw-r--r--Documentation/SubmittingPatches82
-rw-r--r--Documentation/arm/Samsung-S3C24XX/GPIO.txt10
-rw-r--r--Documentation/block/biodoc.txt2
-rw-r--r--Documentation/block/deadline-iosched.txt2
-rw-r--r--Documentation/braille-console.txt2
-rw-r--r--Documentation/dell_rbu.txt4
-rw-r--r--Documentation/development-process/5.Posting31
-rw-r--r--Documentation/driver-model/devres.txt2
-rw-r--r--Documentation/edac.txt8
-rw-r--r--Documentation/fb/sh7760fb.txt2
-rw-r--r--Documentation/feature-removal-schedule.txt17
-rw-r--r--Documentation/filesystems/autofs4-mount-control.txt2
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt2
-rw-r--r--Documentation/filesystems/debugfs.txt158
-rw-r--r--Documentation/filesystems/ext4.txt6
-rw-r--r--Documentation/filesystems/fiemap.txt2
-rw-r--r--Documentation/filesystems/gfs2-glocks.txt2
-rw-r--r--Documentation/filesystems/gfs2.txt19
-rw-r--r--Documentation/filesystems/nfs-rdma.txt2
-rw-r--r--Documentation/filesystems/nilfs2.txt5
-rw-r--r--Documentation/filesystems/proc.txt4
-rw-r--r--Documentation/filesystems/sysfs-pci.txt2
-rw-r--r--Documentation/filesystems/vfat.txt8
-rw-r--r--Documentation/futex-requeue-pi.txt131
-rw-r--r--Documentation/gpio.txt2
-rw-r--r--Documentation/hwmon/f71882fg12
-rw-r--r--Documentation/hwmon/ibmaem2
-rw-r--r--Documentation/hwmon/sysfs-interface19
-rw-r--r--Documentation/hwmon/tmp40142
-rw-r--r--Documentation/hwmon/w83627ehf11
-rw-r--r--Documentation/i2c/busses/i2c-ocores17
-rw-r--r--Documentation/i2c/busses/i2c-viapro4
-rw-r--r--Documentation/ide/ide.txt2
-rw-r--r--Documentation/isdn/00-INDEX29
-rw-r--r--Documentation/isdn/INTERFACE.CAPI94
-rw-r--r--Documentation/isdn/README.gigaset42
-rw-r--r--Documentation/kbuild/kconfig.txt116
-rw-r--r--Documentation/kbuild/modules.txt2
-rw-r--r--Documentation/kdump/kdump.txt4
-rw-r--r--Documentation/kernel-parameters.txt78
-rw-r--r--Documentation/kmemleak.txt142
-rw-r--r--Documentation/kobject.txt2
-rw-r--r--Documentation/laptops/acer-wmi.txt2
-rw-r--r--Documentation/laptops/sony-laptop.txt2
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt2
-rw-r--r--Documentation/lguest/Makefile3
-rw-r--r--Documentation/lguest/lguest.c1008
-rw-r--r--Documentation/lguest/lguest.txt1
-rw-r--r--Documentation/local_ops.txt2
-rw-r--r--Documentation/memory-barriers.txt129
-rw-r--r--Documentation/memory-hotplug.txt8
-rw-r--r--Documentation/mn10300/ABI.txt2
-rw-r--r--Documentation/mtd/nand_ecc.txt12
-rw-r--r--Documentation/networking/bonding.txt6
-rw-r--r--Documentation/networking/can.txt237
-rw-r--r--Documentation/networking/dm9000.txt2
-rw-r--r--Documentation/networking/ieee802154.txt76
-rw-r--r--Documentation/networking/ip-sysctl.txt18
-rw-r--r--Documentation/networking/ipv6.txt37
-rw-r--r--Documentation/networking/l2tp.txt2
-rw-r--r--Documentation/networking/mac80211-injection.txt28
-rw-r--r--Documentation/networking/netdevices.txt2
-rw-r--r--Documentation/networking/operstates.txt3
-rw-r--r--Documentation/networking/packet_mmap.txt140
-rw-r--r--Documentation/networking/phonet.txt2
-rw-r--r--Documentation/networking/regulatory.txt2
-rw-r--r--Documentation/power/devices.txt34
-rw-r--r--Documentation/power/regulator/consumer.txt2
-rw-r--r--Documentation/power/regulator/overview.txt2
-rw-r--r--Documentation/power/s2ram.txt2
-rw-r--r--Documentation/power/userland-swsusp.txt2
-rw-r--r--Documentation/powerpc/booting-without-of.txt4
-rw-r--r--Documentation/powerpc/dts-bindings/can/sja1000.txt53
-rw-r--r--Documentation/powerpc/dts-bindings/ecm.txt64
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/board.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt3
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/esdhc.txt5
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/mcm.txt64
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/msi-pic.txt2
-rw-r--r--Documentation/powerpc/dts-bindings/fsl/pmc.txt4
-rw-r--r--Documentation/powerpc/qe_firmware.txt2
-rw-r--r--Documentation/rbtree.txt10
-rw-r--r--Documentation/rfkill.txt607
-rw-r--r--Documentation/s390/Debugging390.txt4
-rw-r--r--Documentation/scheduler/sched-nice-design.txt2
-rw-r--r--Documentation/scheduler/sched-rt-group.txt20
-rw-r--r--Documentation/scsi/aic79xx.txt2
-rw-r--r--Documentation/scsi/ncr53c8xx.txt4
-rw-r--r--Documentation/scsi/sym53c8xx_2.txt2
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt38
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt18
-rw-r--r--Documentation/sound/alsa/HD-Audio.txt2
-rw-r--r--Documentation/sound/alsa/Procfile.txt36
-rw-r--r--Documentation/sound/alsa/README.maya44163
-rw-r--r--Documentation/sound/alsa/hda_codec.txt2
-rw-r--r--Documentation/sound/alsa/soc/dapm.txt1
-rw-r--r--Documentation/sysctl/kernel.txt11
-rw-r--r--Documentation/sysctl/vm.txt4
-rw-r--r--Documentation/timers/hpet.txt2
-rw-r--r--Documentation/timers/timer_stats.txt2
-rw-r--r--Documentation/trace/events.txt90
-rw-r--r--Documentation/trace/ftrace.txt19
-rw-r--r--Documentation/trace/kmemtrace.txt2
-rw-r--r--Documentation/trace/power.txt17
-rw-r--r--Documentation/usb/WUSB-Design-overview.txt8
-rw-r--r--Documentation/usb/anchors.txt4
-rw-r--r--Documentation/usb/callbacks.txt2
-rw-r--r--Documentation/video4linux/cx18.txt2
-rw-r--r--Documentation/x86/boot.txt122
-rw-r--r--Documentation/x86/x86_64/boot-options.txt49
-rw-r--r--Documentation/x86/x86_64/machinecheck8
-rw-r--r--Documentation/x86/x86_64/mm.txt9
127 files changed, 3114 insertions, 1616 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 44f52a4f5903..cbbd3e069945 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -60,3 +60,62 @@ Description:
60 Indicates whether the block layer should automatically 60 Indicates whether the block layer should automatically
61 generate checksums for write requests bound for 61 generate checksums for write requests bound for
62 devices that support receiving integrity metadata. 62 devices that support receiving integrity metadata.
63
64What: /sys/block/<disk>/alignment_offset
65Date: April 2009
66Contact: Martin K. Petersen <martin.petersen@oracle.com>
67Description:
68 Storage devices may report a physical block size that is
69 bigger than the logical block size (for instance a drive
70 with 4KB physical sectors exposing 512-byte logical
71 blocks to the operating system). This parameter
72 indicates how many bytes the beginning of the device is
73 offset from the disk's natural alignment.
74
75What: /sys/block/<disk>/<partition>/alignment_offset
76Date: April 2009
77Contact: Martin K. Petersen <martin.petersen@oracle.com>
78Description:
79 Storage devices may report a physical block size that is
80 bigger than the logical block size (for instance a drive
81 with 4KB physical sectors exposing 512-byte logical
82 blocks to the operating system). This parameter
83 indicates how many bytes the beginning of the partition
84 is offset from the disk's natural alignment.
85
86What: /sys/block/<disk>/queue/logical_block_size
87Date: May 2009
88Contact: Martin K. Petersen <martin.petersen@oracle.com>
89Description:
90 This is the smallest unit the storage device can
91 address. It is typically 512 bytes.
92
93What: /sys/block/<disk>/queue/physical_block_size
94Date: May 2009
95Contact: Martin K. Petersen <martin.petersen@oracle.com>
96Description:
97 This is the smallest unit the storage device can write
98 without resorting to read-modify-write operation. It is
99 usually the same as the logical block size but may be
100 bigger. One example is SATA drives with 4KB sectors
101 that expose a 512-byte logical block size to the
102 operating system.
103
104What: /sys/block/<disk>/queue/minimum_io_size
105Date: April 2009
106Contact: Martin K. Petersen <martin.petersen@oracle.com>
107Description:
108 Storage devices may report a preferred minimum I/O size,
109 which is the smallest request the device can perform
110 without incurring a read-modify-write penalty. For disk
111 drives this is often the physical block size. For RAID
112 arrays it is often the stripe chunk size.
113
114What: /sys/block/<disk>/queue/optimal_io_size
115Date: April 2009
116Contact: Martin K. Petersen <martin.petersen@oracle.com>
117Description:
118 Storage devices may report an optimal I/O size, which is
119 the device's preferred unit of receiving I/O. This is
120 rarely reported for disk drives. For RAID devices it is
121 usually the stripe width or the internal block size.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
new file mode 100644
index 000000000000..0a92a7c93a62
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -0,0 +1,33 @@
1Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/model
2Date: March 2009
3Kernel Version: 2.6.30
4Contact: iss_storagedev@hp.com
5Description: Displays the SCSI INQUIRY page 0 model for logical drive
6 Y of controller X.
7
8Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
9Date: March 2009
10Kernel Version: 2.6.30
11Contact: iss_storagedev@hp.com
12Description: Displays the SCSI INQUIRY page 0 revision for logical
13 drive Y of controller X.
14
15Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
16Date: March 2009
17Kernel Version: 2.6.30
18Contact: iss_storagedev@hp.com
19Description: Displays the SCSI INQUIRY page 83 serial number for logical
20 drive Y of controller X.
21
22Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
23Date: March 2009
24Kernel Version: 2.6.30
25Contact: iss_storagedev@hp.com
26Description: Displays the SCSI INQUIRY page 0 vendor for logical drive
27 Y of controller X.
28
29Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
30Date: March 2009
31Kernel Version: 2.6.30
32Contact: iss_storagedev@hp.com
33Description: A symbolic link to /sys/block/cciss!cXdY
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable
new file mode 100644
index 000000000000..175bb4f70512
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable
@@ -0,0 +1,18 @@
1What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
2Date: August 2008
3KernelVersion: 2.6.27
4Contact: mark.langsdorf@amd.com
5Description: These files exist in every cpu's cache index directories.
6 There are currently 2 cache_disable_# files in each
7 directory. Reading from these files on a supported
8 processor will return that cache disable index value
9 for that processor and node. Writing to one of these
10 files will cause the specificed cache index to be disabled.
11
12 Currently, only AMD Family 10h Processors support cache index
13 disable, and only for their L3 caches. See the BIOS and
14 Kernel Developer's Guide at
15 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
16 for formatting information and other details on the
17 cache index disable.
18Users: joachim.deguara@amd.com
diff --git a/Documentation/Changes b/Documentation/Changes
index b95082be4d5e..664392481c84 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -29,7 +29,7 @@ hardware, for example, you probably needn't concern yourself with
29isdn4k-utils. 29isdn4k-utils.
30 30
31o Gnu C 3.2 # gcc --version 31o Gnu C 3.2 # gcc --version
32o Gnu make 3.79.1 # make --version 32o Gnu make 3.80 # make --version
33o binutils 2.12 # ld -v 33o binutils 2.12 # ld -v
34o util-linux 2.10o # fdformat --version 34o util-linux 2.10o # fdformat --version
35o module-init-tools 0.9.10 # depmod -V 35o module-init-tools 0.9.10 # depmod -V
@@ -48,6 +48,7 @@ o procps 3.2.0 # ps --version
48o oprofile 0.9 # oprofiled --version 48o oprofile 0.9 # oprofiled --version
49o udev 081 # udevinfo -V 49o udev 081 # udevinfo -V
50o grub 0.93 # grub --version 50o grub 0.93 # grub --version
51o mcelog 0.6
51 52
52Kernel compilation 53Kernel compilation
53================== 54==================
@@ -61,7 +62,7 @@ computer.
61Make 62Make
62---- 63----
63 64
64You will need Gnu make 3.79.1 or later to build the kernel. 65You will need Gnu make 3.80 or later to build the kernel.
65 66
66Binutils 67Binutils
67-------- 68--------
@@ -276,6 +277,16 @@ before running exportfs or mountd. It is recommended that all NFS
276services be protected from the internet-at-large by a firewall where 277services be protected from the internet-at-large by a firewall where
277that is possible. 278that is possible.
278 279
280mcelog
281------
282
283In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
284as a regular cronjob similar to the x86-64 kernel to process and log
285machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
286events are errors reported by the CPU. Processing them is strongly encouraged.
287All x86-64 kernels since 2.6.4 require the mcelog utility to
288process machine checks.
289
279Getting updated software 290Getting updated software
280======================== 291========================
281 292
@@ -365,6 +376,10 @@ FUSE
365---- 376----
366o <http://sourceforge.net/projects/fuse> 377o <http://sourceforge.net/projects/fuse>
367 378
379mcelog
380------
381o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
382
368Networking 383Networking
369********** 384**********
370 385
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 72968cd5eaf3..8bb37237ebd2 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -698,8 +698,8 @@ very often is not. Abundant use of the inline keyword leads to a much bigger
698kernel, which in turn slows the system as a whole down, due to a bigger 698kernel, which in turn slows the system as a whole down, due to a bigger
699icache footprint for the CPU and simply because there is less memory 699icache footprint for the CPU and simply because there is less memory
700available for the pagecache. Just think about it; a pagecache miss causes a 700available for the pagecache. Just think about it; a pagecache miss causes a
701disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles 701disk seek, which easily takes 5 milliseconds. There are a LOT of cpu cycles
702that can go into these 5 miliseconds. 702that can go into these 5 milliseconds.
703 703
704A reasonable rule of thumb is to not put inline at functions that have more 704A reasonable rule of thumb is to not put inline at functions that have more
705than 3 lines of code in them. An exception to this rule are the cases where 705than 3 lines of code in them. An exception to this rule are the cases where
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d9aa43d78bcc..5aceb88b3f8b 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -676,8 +676,8 @@ this directory the following files can currently be found:
676 dma-api/all_errors This file contains a numeric value. If this 676 dma-api/all_errors This file contains a numeric value. If this
677 value is not equal to zero the debugging code 677 value is not equal to zero the debugging code
678 will print a warning for every error it finds 678 will print a warning for every error it finds
679 into the kernel log. Be carefull with this 679 into the kernel log. Be careful with this
680 option. It can easily flood your logs. 680 option, as it can easily flood your logs.
681 681
682 dma-api/disabled This read-only file contains the character 'Y' 682 dma-api/disabled This read-only file contains the character 'Y'
683 if the debugging code is disabled. This can 683 if the debugging code is disabled. This can
@@ -704,12 +704,24 @@ this directory the following files can currently be found:
704 The current number of free dma_debug_entries 704 The current number of free dma_debug_entries
705 in the allocator. 705 in the allocator.
706 706
707 dma-api/driver-filter
708 You can write a name of a driver into this file
709 to limit the debug output to requests from that
710 particular driver. Write an empty string to
711 that file to disable the filter and see
712 all errors again.
713
707If you have this code compiled into your kernel it will be enabled by default. 714If you have this code compiled into your kernel it will be enabled by default.
708If you want to boot without the bookkeeping anyway you can provide 715If you want to boot without the bookkeeping anyway you can provide
709'dma_debug=off' as a boot parameter. This will disable DMA-API debugging. 716'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
710Notice that you can not enable it again at runtime. You have to reboot to do 717Notice that you can not enable it again at runtime. You have to reboot to do
711so. 718so.
712 719
720If you want to see debug messages only for a special device driver you can
721specify the dma_debug_driver=<drivername> parameter. This will enable the
722driver filter at boot time. The debug code will only print errors for that
723driver afterwards. This filter can be disabled or changed later using debugfs.
724
713When the code disables itself at runtime this is most likely because it ran 725When the code disables itself at runtime this is most likely because it ran
714out of dma_debug_entries. These entries are preallocated at boot. The number 726out of dma_debug_entries. These entries are preallocated at boot. The number
715of preallocated entries is defined per architecture. If it is too low for you 727of preallocated entries is defined per architecture. If it is too low for you
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index b1eb661e6302..9632444f6c62 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ 14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
15 mac80211.xml debugobjects.xml sh.xml regulator.xml \ 15 mac80211.xml debugobjects.xml sh.xml regulator.xml \
16 alsa-driver-api.xml writing-an-alsa-driver.xml 16 alsa-driver-api.xml writing-an-alsa-driver.xml \
17 tracepoint.xml
17 18
18### 19###
19# The build process is as follows (targets): 20# The build process is as follows (targets):
diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl
index fbeaffc1dcc3..e36986663570 100644
--- a/Documentation/DocBook/mac80211.tmpl
+++ b/Documentation/DocBook/mac80211.tmpl
@@ -145,7 +145,6 @@ usage should require reading the full document.
145 interface in STA mode at first! 145 interface in STA mode at first!
146 </para> 146 </para>
147!Finclude/net/mac80211.h ieee80211_if_init_conf 147!Finclude/net/mac80211.h ieee80211_if_init_conf
148!Finclude/net/mac80211.h ieee80211_if_conf
149 </chapter> 148 </chapter>
150 149
151 <chapter id="rx-tx"> 150 <chapter id="rx-tx">
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 000000000000..b0756d0fd579
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,89 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="Tracepoints">
6 <bookinfo>
7 <title>The Linux Kernel Tracepoint API</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Jason</firstname>
12 <surname>Baron</surname>
13 <affiliation>
14 <address>
15 <email>jbaron@redhat.com</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <legalnotice>
22 <para>
23 This documentation is free software; you can redistribute
24 it and/or modify it under the terms of the GNU General Public
25 License as published by the Free Software Foundation; either
26 version 2 of the License, or (at your option) any later
27 version.
28 </para>
29
30 <para>
31 This program is distributed in the hope that it will be
32 useful, but WITHOUT ANY WARRANTY; without even the implied
33 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
34 See the GNU General Public License for more details.
35 </para>
36
37 <para>
38 You should have received a copy of the GNU General Public
39 License along with this program; if not, write to the Free
40 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
41 MA 02111-1307 USA
42 </para>
43
44 <para>
45 For more details see the file COPYING in the source
46 distribution of Linux.
47 </para>
48 </legalnotice>
49 </bookinfo>
50
51 <toc></toc>
52 <chapter id="intro">
53 <title>Introduction</title>
54 <para>
55 Tracepoints are static probe points that are located in strategic points
56 throughout the kernel. 'Probes' register/unregister with tracepoints
57 via a callback mechanism. The 'probes' are strictly typed functions that
58 are passed a unique set of parameters defined by each tracepoint.
59 </para>
60
61 <para>
62 From this simple callback mechanism, 'probes' can be used to profile, debug,
63 and understand kernel behavior. There are a number of tools that provide a
64 framework for using 'probes'. These tools include Systemtap, ftrace, and
65 LTTng.
66 </para>
67
68 <para>
69 Tracepoints are defined in a number of header files via various macros. Thus,
70 the purpose of this document is to provide a clear accounting of the available
71 tracepoints. The intention is to understand not only what tracepoints are
72 available but also to understand where future tracepoints might be added.
73 </para>
74
75 <para>
76 The API presented has functions of the form:
77 <function>trace_tracepointname(function parameters)</function>. These are the
78 tracepoints callbacks that are found throughout the code. Registering and
79 unregistering probes with these callback sites is covered in the
80 <filename>Documentation/trace/*</filename> directory.
81 </para>
82 </chapter>
83
84 <chapter id="irq">
85 <title>IRQ</title>
86!Iinclude/trace/events/irq.h
87 </chapter>
88
89</book>
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 6389dec33459..93cb28d05dcd 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -118,7 +118,7 @@ to another chain) checking the final 'nulls' value if
118the lookup met the end of chain. If final 'nulls' value 118the lookup met the end of chain. If final 'nulls' value
119is not the slot number, then we must restart the lookup at 119is not the slot number, then we must restart the lookup at
120the beginning. If the object was moved to the same chain, 120the beginning. If the object was moved to the same chain,
121then the reader doesnt care : It might eventually 121then the reader doesn't care : It might eventually
122scan the list again without harm. 122scan the list again without harm.
123 123
124 124
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 068848240a8b..02cced183b2d 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
192The output of "cat rcu/rcudata" looks as follows: 192The output of "cat rcu/rcudata" looks as follows:
193 193
194rcu: 194rcu:
195 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 195rcu:
196 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 196 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
197 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 197 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
198 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 198 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
199 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 199 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
200 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 200 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
201 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 201 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
202 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 202 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
203 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
203rcu_bh: 204rcu_bh:
204 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 205 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
205 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 206 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
206 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 207 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
207 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 208 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
208 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 209 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
209 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 210 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
210 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 211 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
211 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 212 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
212 213
213The first section lists the rcu_data structures for rcu, the second for 214The first section lists the rcu_data structures for rcu, the second for
214rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. 215rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent
253o "qp" indicates that RCU still expects a quiescent state from 254o "qp" indicates that RCU still expects a quiescent state from
254 this CPU. 255 this CPU.
255 256
256o "rpfq" is the number of rcu_pending() calls on this CPU required
257 to induce this CPU to invoke force_quiescent_state().
258
259o "rp" is low-order four hex digits of the count of how many times
260 rcu_pending() has been invoked on this CPU.
261
262o "dt" is the current value of the dyntick counter that is incremented 257o "dt" is the current value of the dyntick counter that is incremented
263 when entering or leaving dynticks idle state, either by the 258 when entering or leaving dynticks idle state, either by the
264 scheduler or by irq. The number after the "/" is the interrupt 259 scheduler or by irq. The number after the "/" is the interrupt
@@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number
305 of RCU callbacks is ready to invoke, then the remainder will 300 of RCU callbacks is ready to invoke, then the remainder will
306 be deferred. 301 be deferred.
307 302
303There is also an rcu/rcudata.csv file with the same information in
304comma-separated-variable spreadsheet format.
305
308 306
309The output of "cat rcu/rcugp" looks as follows: 307The output of "cat rcu/rcugp" looks as follows:
310 308
@@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
411 For example, the first entry at the lowest level shows 409 For example, the first entry at the lowest level shows
412 "^0", indicating that it corresponds to bit zero in 410 "^0", indicating that it corresponds to bit zero in
413 the first entry at the middle level. 411 the first entry at the middle level.
412
413
414The output of "cat rcu/rcu_pending" looks as follows:
415
416rcu:
417 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
418 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
419 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
420 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
421 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
422 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
423 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
424 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
425rcu_bh:
426 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
427 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
428 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
429 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
430 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
431 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
432 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
433 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
434
435As always, this is once again split into "rcu" and "rcu_bh" portions.
436The fields are as follows:
437
438o "np" is the number of times that __rcu_pending() has been invoked
439 for the corresponding flavor of RCU.
440
441o "qsp" is the number of times that the RCU was waiting for a
442 quiescent state from this CPU.
443
444o "cbr" is the number of times that this CPU had RCU callbacks
445 that had passed through a grace period, and were thus ready
446 to be invoked.
447
448o "cng" is the number of times that this CPU needed another
449 grace period while RCU was idle.
450
451o "gpc" is the number of times that an old grace period had
452 completed, but this CPU was not yet aware of it.
453
454o "gps" is the number of times that a new grace period had started,
455 but this CPU was not yet aware of it.
456
457o "nf" is the number of times that this CPU suspected that the
458 current grace period had run for too long, and thus needed to
459 be forced.
460
461 Please note that "forcing" consists of sending resched IPIs
462 to holdout CPUs. If that CPU really still is in an old RCU
463 read-side critical section, then we really do have to wait for it.
464 The assumption behing "forcing" is that the CPU is not still in
465 an old RCU read-side critical section, but has not yet responded
466 for some other reason.
467
468o "nn" is the number of times that this CPU needed nothing. Alert
469 readers will note that the rcu "nn" number for a given CPU very
470 closely matches the rcu_bh "np" number for that same CPU. This
471 is due to short-circuit evaluation in rcu_pending().
diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt
index 6fc656035925..561826f82093 100644
--- a/Documentation/SM501.txt
+++ b/Documentation/SM501.txt
@@ -5,7 +5,7 @@ Copyright 2006, 2007 Simtec Electronics
5 5
6The Silicon Motion SM501 multimedia companion chip is a multifunction device 6The Silicon Motion SM501 multimedia companion chip is a multifunction device
7which may provide numerous interfaces including USB host controller USB gadget, 7which may provide numerous interfaces including USB host controller USB gadget,
8Asyncronous Serial ports, Audio functions and a dual display video interface. 8asynchronous serial ports, audio functions, and a dual display video interface.
9The device may be connected by PCI or local bus with varying functions enabled. 9The device may be connected by PCI or local bus with varying functions enabled.
10 10
11Core 11Core
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt
index 629c92e99783..34614b4c708e 100644
--- a/Documentation/Smack.txt
+++ b/Documentation/Smack.txt
@@ -184,8 +184,9 @@ length. Single character labels using special characters, that being anything
184other than a letter or digit, are reserved for use by the Smack development 184other than a letter or digit, are reserved for use by the Smack development
185team. Smack labels are unstructured, case sensitive, and the only operation 185team. Smack labels are unstructured, case sensitive, and the only operation
186ever performed on them is comparison for equality. Smack labels cannot 186ever performed on them is comparison for equality. Smack labels cannot
187contain unprintable characters or the "/" (slash) character. Smack labels 187contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
188cannot begin with a '-', which is reserved for special options. 188(quote) and '"' (double-quote) characters.
189Smack labels cannot begin with a '-', which is reserved for special options.
189 190
190There are some predefined labels: 191There are some predefined labels:
191 192
@@ -523,3 +524,18 @@ Smack supports some mount options:
523 524
524These mount options apply to all file system types. 525These mount options apply to all file system types.
525 526
527Smack auditing
528
529If you want Smack auditing of security events, you need to set CONFIG_AUDIT
530in your kernel configuration.
531By default, all denied events will be audited. You can change this behavior by
532writing a single character to the /smack/logging file :
5330 : no logging
5341 : log denied (default)
5352 : log accepted
5363 : log denied & accepted
537
538Events are logged as 'key=value' pairs, for each event you at least will get
539the subjet, the object, the rights requested, the action, the kernel function
540that triggered the event, plus other pairs depending on the type of event
541audited.
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index f309d3c6221c..5c555a8b39e5 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -91,6 +91,10 @@ Be as specific as possible. The WORST descriptions possible include
91things like "update driver X", "bug fix for driver X", or "this patch 91things like "update driver X", "bug fix for driver X", or "this patch
92includes updates for subsystem X. Please apply." 92includes updates for subsystem X. Please apply."
93 93
94The maintainer will thank you if you write your patch description in a
95form which can be easily pulled into Linux's source code management
96system, git, as a "commit log". See #15, below.
97
94If your description starts to get long, that's a sign that you probably 98If your description starts to get long, that's a sign that you probably
95need to split up your patch. See #3, next. 99need to split up your patch. See #3, next.
96 100
@@ -183,8 +187,9 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS
183copy the maintainer when you change their code. 187copy the maintainer when you change their code.
184 188
185For small patches you may want to CC the Trivial Patch Monkey 189For small patches you may want to CC the Trivial Patch Monkey
186trivial@kernel.org managed by Jesper Juhl; which collects "trivial" 190trivial@kernel.org which collects "trivial" patches. Have a look
187patches. Trivial patches must qualify for one of the following rules: 191into the MAINTAINERS file for its current manager.
192Trivial patches must qualify for one of the following rules:
188 Spelling fixes in documentation 193 Spelling fixes in documentation
189 Spelling fixes which could break grep(1) 194 Spelling fixes which could break grep(1)
190 Warning fixes (cluttering with useless warnings is bad) 195 Warning fixes (cluttering with useless warnings is bad)
@@ -196,7 +201,6 @@ patches. Trivial patches must qualify for one of the following rules:
196 since people copy, as long as it's trivial) 201 since people copy, as long as it's trivial)
197 Any fix by the author/maintainer of the file (ie. patch monkey 202 Any fix by the author/maintainer of the file (ie. patch monkey
198 in re-transmission mode) 203 in re-transmission mode)
199URL: <http://www.kernel.org/pub/linux/kernel/people/juhl/trivial/>
200 204
201 205
202 206
@@ -405,7 +409,14 @@ person it names. This tag documents that potentially interested parties
405have been included in the discussion 409have been included in the discussion
406 410
407 411
40814) Using Tested-by: and Reviewed-by: 41214) Using Reported-by:, Tested-by: and Reviewed-by:
413
414If this patch fixes a problem reported by somebody else, consider adding a
415Reported-by: tag to credit the reporter for their contribution. Please
416note that this tag should not be added without the reporter's permission,
417especially if the problem was not reported in a public forum. That said,
418if we diligently credit our bug reporters, they will, hopefully, be
419inspired to help us again in the future.
409 420
410A Tested-by: tag indicates that the patch has been successfully tested (in 421A Tested-by: tag indicates that the patch has been successfully tested (in
411some environment) by the person named. This tag informs maintainers that 422some environment) by the person named. This tag informs maintainers that
@@ -444,7 +455,7 @@ offer a Reviewed-by tag for a patch. This tag serves to give credit to
444reviewers and to inform maintainers of the degree of review which has been 455reviewers and to inform maintainers of the degree of review which has been
445done on the patch. Reviewed-by: tags, when supplied by reviewers known to 456done on the patch. Reviewed-by: tags, when supplied by reviewers known to
446understand the subject area and to perform thorough reviews, will normally 457understand the subject area and to perform thorough reviews, will normally
447increase the liklihood of your patch getting into the kernel. 458increase the likelihood of your patch getting into the kernel.
448 459
449 460
45015) The canonical patch format 46115) The canonical patch format
@@ -485,12 +496,33 @@ phrase" should not be a filename. Do not use the same "summary
485phrase" for every patch in a whole patch series (where a "patch 496phrase" for every patch in a whole patch series (where a "patch
486series" is an ordered sequence of multiple, related patches). 497series" is an ordered sequence of multiple, related patches).
487 498
488Bear in mind that the "summary phrase" of your email becomes 499Bear in mind that the "summary phrase" of your email becomes a
489a globally-unique identifier for that patch. It propagates 500globally-unique identifier for that patch. It propagates all the way
490all the way into the git changelog. The "summary phrase" may 501into the git changelog. The "summary phrase" may later be used in
491later be used in developer discussions which refer to the patch. 502developer discussions which refer to the patch. People will want to
492People will want to google for the "summary phrase" to read 503google for the "summary phrase" to read discussion regarding that
493discussion regarding that patch. 504patch. It will also be the only thing that people may quickly see
505when, two or three months later, they are going through perhaps
506thousands of patches using tools such as "gitk" or "git log
507--oneline".
508
509For these reasons, the "summary" must be no more than 70-75
510characters, and it must describe both what the patch changes, as well
511as why the patch might be necessary. It is challenging to be both
512succinct and descriptive, but that is what a well-written summary
513should do.
514
515The "summary phrase" may be prefixed by tags enclosed in square
516brackets: "Subject: [PATCH tag] <summary phrase>". The tags are not
517considered part of the summary phrase, but describe how the patch
518should be treated. Common tags might include a version descriptor if
519the multiple versions of the patch have been sent out in response to
520comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for
521comments. If there are four patches in a patch series the individual
522patches may be numbered like this: 1/4, 2/4, 3/4, 4/4. This assures
523that developers understand the order in which the patches should be
524applied and that they have reviewed or applied all of the patches in
525the patch series.
494 526
495A couple of example Subjects: 527A couple of example Subjects:
496 528
@@ -510,19 +542,31 @@ the patch author in the changelog.
510The explanation body will be committed to the permanent source 542The explanation body will be committed to the permanent source
511changelog, so should make sense to a competent reader who has long 543changelog, so should make sense to a competent reader who has long
512since forgotten the immediate details of the discussion that might 544since forgotten the immediate details of the discussion that might
513have led to this patch. 545have led to this patch. Including symptoms of the failure which the
546patch addresses (kernel log messages, oops messages, etc.) is
547especially useful for people who might be searching the commit logs
548looking for the applicable patch. If a patch fixes a compile failure,
549it may not be necessary to include _all_ of the compile failures; just
550enough that it is likely that someone searching for the patch can find
551it. As in the "summary phrase", it is important to be both succinct as
552well as descriptive.
514 553
515The "---" marker line serves the essential purpose of marking for patch 554The "---" marker line serves the essential purpose of marking for patch
516handling tools where the changelog message ends. 555handling tools where the changelog message ends.
517 556
518One good use for the additional comments after the "---" marker is for 557One good use for the additional comments after the "---" marker is for
519a diffstat, to show what files have changed, and the number of inserted 558a diffstat, to show what files have changed, and the number of
520and deleted lines per file. A diffstat is especially useful on bigger 559inserted and deleted lines per file. A diffstat is especially useful
521patches. Other comments relevant only to the moment or the maintainer, 560on bigger patches. Other comments relevant only to the moment or the
522not suitable for the permanent changelog, should also go here. 561maintainer, not suitable for the permanent changelog, should also go
523Use diffstat options "-p 1 -w 70" so that filenames are listed from the 562here. A good example of such comments might be "patch changelogs"
524top of the kernel source tree and don't use too much horizontal space 563which describe what has changed between the v1 and v2 version of the
525(easily fit in 80 columns, maybe with some indentation). 564patch.
565
566If you are going to include a diffstat after the "---" marker, please
567use diffstat options "-p 1 -w 70" so that filenames are listed from
568the top of the kernel source tree and don't use too much horizontal
569space (easily fit in 80 columns, maybe with some indentation).
526 570
527See more details on the proper patch format in the following 571See more details on the proper patch format in the following
528references. 572references.
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
index ea7ccfc4b274..948c8718d967 100644
--- a/Documentation/arm/Samsung-S3C24XX/GPIO.txt
+++ b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
@@ -51,7 +51,7 @@ PIN Numbers
51----------- 51-----------
52 52
53 Each pin has an unique number associated with it in regs-gpio.h, 53 Each pin has an unique number associated with it in regs-gpio.h,
54 eg S3C2410_GPA0 or S3C2410_GPF1. These defines are used to tell 54 eg S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
55 the GPIO functions which pin is to be used. 55 the GPIO functions which pin is to be used.
56 56
57 57
@@ -65,11 +65,11 @@ Configuring a pin
65 65
66 Eg: 66 Eg:
67 67
68 s3c2410_gpio_cfgpin(S3C2410_GPA0, S3C2410_GPA0_ADDR0); 68 s3c2410_gpio_cfgpin(S3C2410_GPA(0), S3C2410_GPA0_ADDR0);
69 s3c2410_gpio_cfgpin(S3C2410_GPE8, S3C2410_GPE8_SDDAT1); 69 s3c2410_gpio_cfgpin(S3C2410_GPE(8), S3C2410_GPE8_SDDAT1);
70 70
71 which would turn GPA0 into the lowest Address line A0, and set 71 which would turn GPA(0) into the lowest Address line A0, and set
72 GPE8 to be connected to the SDIO/MMC controller's SDDAT1 line. 72 GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
73 73
74 74
75Reading the current configuration 75Reading the current configuration
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6fab97ea7e6b..8d2158a1c6aa 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
186do not have a corresponding kernel virtual address space mapping) and 186do not have a corresponding kernel virtual address space mapping) and
187low-memory pages. 187low-memory pages.
188 188
189Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion 189Note: Please refer to Documentation/DMA-mapping.txt for a discussion
190on PCI high mem DMA aspects and mapping of scatter gather lists, and support 190on PCI high mem DMA aspects and mapping of scatter gather lists, and support
191for 64 bit PCI. 191for 64 bit PCI.
192 192
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index 72576769e0f4..2d82c80322cb 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt
@@ -58,7 +58,7 @@ same criteria as reads.
58front_merges (bool) 58front_merges (bool)
59------------ 59------------
60 60
61Sometimes it happens that a request enters the io scheduler that is contigious 61Sometimes it happens that a request enters the io scheduler that is contiguous
62with a request that is already on the queue. Either it fits in the back of that 62with a request that is already on the queue. Either it fits in the back of that
63request, or it fits at the front. That is called either a back merge candidate 63request, or it fits at the front. That is called either a back merge candidate
64or a front merge candidate. Due to the way files are typically laid out, 64or a front merge candidate. Due to the way files are typically laid out,
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt
index 000b0fbdc105..d0d042c2fd5e 100644
--- a/Documentation/braille-console.txt
+++ b/Documentation/braille-console.txt
@@ -27,7 +27,7 @@ parameter.
27 27
28For simplicity, only one braille console can be enabled, other uses of 28For simplicity, only one braille console can be enabled, other uses of
29console=brl,... will be discarded. Also note that it does not interfere with 29console=brl,... will be discarded. Also note that it does not interfere with
30the console selection mecanism described in serial-console.txt 30the console selection mechanism described in serial-console.txt
31 31
32For now, only the VisioBraille device is supported. 32For now, only the VisioBraille device is supported.
33 33
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index c11b931f8f98..15174985ad08 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -76,9 +76,9 @@ Do the steps below to download the BIOS image.
76 76
77The /sys/class/firmware/dell_rbu/ entries will remain till the following is 77The /sys/class/firmware/dell_rbu/ entries will remain till the following is
78done. 78done.
79echo -1 > /sys/class/firmware/dell_rbu/loading. 79echo -1 > /sys/class/firmware/dell_rbu/loading
80Until this step is completed the driver cannot be unloaded. 80Until this step is completed the driver cannot be unloaded.
81Also echoing either mono ,packet or init in to image_type will free up the 81Also echoing either mono, packet or init in to image_type will free up the
82memory allocated by the driver. 82memory allocated by the driver.
83 83
84If a user by accident executes steps 1 and 3 above without executing step 2; 84If a user by accident executes steps 1 and 3 above without executing step 2;
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index dd48132a74dd..f622c1e9f0f9 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting
@@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been
119done. When done properly, though, it is time well spent. 119done. When done properly, though, it is time well spent.
120 120
121 121
1225.4: PATCH FORMATTING 1225.4: PATCH FORMATTING AND CHANGELOGS
123 123
124So now you have a perfect series of patches for posting, but the work is 124So now you have a perfect series of patches for posting, but the work is
125not done quite yet. Each patch needs to be formatted into a message which 125not done quite yet. Each patch needs to be formatted into a message which
@@ -146,8 +146,33 @@ that end, each patch will be composed of the following:
146 - One or more tag lines, with, at a minimum, one Signed-off-by: line from 146 - One or more tag lines, with, at a minimum, one Signed-off-by: line from
147 the author of the patch. Tags will be described in more detail below. 147 the author of the patch. Tags will be described in more detail below.
148 148
149The above three items should, normally, be the text used when committing 149The items above, together, form the changelog for the patch. Writing good
150the change to a revision control system. They are followed by: 150changelogs is a crucial but often-neglected art; it's worth spending
151another moment discussing this issue. When writing a changelog, you should
152bear in mind that a number of different people will be reading your words.
153These include subsystem maintainers and reviewers who need to decide
154whether the patch should be included, distributors and other maintainers
155trying to decide whether a patch should be backported to other kernels, bug
156hunters wondering whether the patch is responsible for a problem they are
157chasing, users who want to know how the kernel has changed, and more. A
158good changelog conveys the needed information to all of these people in the
159most direct and concise way possible.
160
161To that end, the summary line should describe the effects of and motivation
162for the change as well as possible given the one-line constraint. The
163detailed description can then amplify on those topics and provide any
164needed additional information. If the patch fixes a bug, cite the commit
165which introduced the bug if possible. If a problem is associated with
166specific log or compiler output, include that output to help others
167searching for a solution to the same problem. If the change is meant to
168support other changes coming in later patch, say so. If internal APIs are
169changed, detail those changes and how other developers should respond. In
170general, the more you can put yourself into the shoes of everybody who will
171be reading your changelog, the better that changelog (and the kernel as a
172whole) will be.
173
174Needless to say, the changelog should be the text used when committing the
175change to a revision control system. It will be followed by:
151 176
152 - The patch itself, in the unified ("-u") patch format. Using the "-p" 177 - The patch itself, in the unified ("-u") patch format. Using the "-p"
153 option to diff will associate function names with changes, making the 178 option to diff will associate function names with changes, making the
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 387b8a720f4a..d79aead9418b 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -188,7 +188,7 @@ For example, you can do something like the following.
188 188
189 void my_midlayer_destroy_something() 189 void my_midlayer_destroy_something()
190 { 190 {
191 devres_release_group(dev, my_midlayer_create_soemthing); 191 devres_release_group(dev, my_midlayer_create_something);
192 } 192 }
193 193
194 194
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 8eda3fb66416..06f8f46692dc 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -23,8 +23,8 @@ first time, it was renamed to 'EDAC'.
23The bluesmoke project at sourceforge.net is now utilized as a 'staging area' 23The bluesmoke project at sourceforge.net is now utilized as a 'staging area'
24for EDAC development, before it is sent upstream to kernel.org 24for EDAC development, before it is sent upstream to kernel.org
25 25
26At the bluesmoke/EDAC project site, is a series of quilt patches against 26At the bluesmoke/EDAC project site is a series of quilt patches against
27recent kernels, stored in a SVN respository. For easier downloading, there 27recent kernels, stored in a SVN repository. For easier downloading, there
28is also a tarball snapshot available. 28is also a tarball snapshot available.
29 29
30============================================================================ 30============================================================================
@@ -73,9 +73,9 @@ the vendor should tie the parity status bits to 0 if they do not intend
73to generate parity. Some vendors do not do this, and thus the parity bit 73to generate parity. Some vendors do not do this, and thus the parity bit
74can "float" giving false positives. 74can "float" giving false positives.
75 75
76In the kernel there is a pci device attribute located in sysfs that is 76In the kernel there is a PCI device attribute located in sysfs that is
77checked by the EDAC PCI scanning code. If that attribute is set, 77checked by the EDAC PCI scanning code. If that attribute is set,
78PCI parity/error scannining is skipped for that device. The attribute 78PCI parity/error scanning is skipped for that device. The attribute
79is: 79is:
80 80
81 broken_parity_status 81 broken_parity_status
diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt
index c87bfe5c630a..b994c3b10549 100644
--- a/Documentation/fb/sh7760fb.txt
+++ b/Documentation/fb/sh7760fb.txt
@@ -1,7 +1,7 @@
1SH7760/SH7763 integrated LCDC Framebuffer driver 1SH7760/SH7763 integrated LCDC Framebuffer driver
2================================================ 2================================================
3 3
40. Overwiew 40. Overview
5----------- 5-----------
6The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which 6The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
7supports (in theory) resolutions ranging from 1x1 to 1024x1024, 7supports (in theory) resolutions ranging from 1x1 to 1024x1024,
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index de491a3e2313..7129846a2785 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -437,3 +437,20 @@ Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate
437 driver but this caused driver conflicts. 437 driver but this caused driver conflicts.
438Who: Jean Delvare <khali@linux-fr.org> 438Who: Jean Delvare <khali@linux-fr.org>
439 Krzysztof Helt <krzysztof.h1@wp.pl> 439 Krzysztof Helt <krzysztof.h1@wp.pl>
440
441---------------------------
442
443What: CONFIG_RFKILL_INPUT
444When: 2.6.33
445Why: Should be implemented in userspace, policy daemon.
446Who: Johannes Berg <johannes@sipsolutions.net>
447
448----------------------------
449
450What: CONFIG_X86_OLD_MCE
451When: 2.6.32
452Why: Remove the old legacy 32bit machine check code. This has been
453 superseded by the newer machine check code from the 64bit port,
454 but the old version has been kept around for easier testing. Note this
455 doesn't impact the old P5 and WinChip machine check handlers.
456Who: Andi Kleen <andi@firstfloor.org>
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
index c6341745df37..8f78ded4b648 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -369,7 +369,7 @@ The call requires an initialized struct autofs_dev_ioctl. There are two
369possible variations. Both use the path field set to the path of the mount 369possible variations. Both use the path field set to the path of the mount
370point to check and the size field adjusted appropriately. One uses the 370point to check and the size field adjusted appropriately. One uses the
371ioctlfd field to identify a specific mount point to check while the other 371ioctlfd field to identify a specific mount point to check while the other
372variation uses the path and optionaly arg1 set to an autofs mount type. 372variation uses the path and optionally arg1 set to an autofs mount type.
373The call returns 1 if this is a mount point and sets arg1 to the device 373The call returns 1 if this is a mount point and sets arg1 to the device
374number of the mount and field arg2 to the relevant super block magic 374number of the mount and field arg2 to the relevant super block magic
375number (described below) or 0 if it isn't a mountpoint. In both cases 375number (described below) or 0 if it isn't a mountpoint. In both cases
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 4db125b3a5c6..2666b1ed5e9e 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -184,7 +184,7 @@ This has the following fields:
184 have index children. 184 have index children.
185 185
186 If this function is not supplied or if it returns NULL then the first 186 If this function is not supplied or if it returns NULL then the first
187 cache in the parent's list will be chosed, or failing that, the first 187 cache in the parent's list will be chosen, or failing that, the first
188 cache in the master list. 188 cache in the master list.
189 189
190 (4) A function to retrieve an object's key from the netfs [mandatory]. 190 (4) A function to retrieve an object's key from the netfs [mandatory].
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
new file mode 100644
index 000000000000..ed52af60c2d8
--- /dev/null
+++ b/Documentation/filesystems/debugfs.txt
@@ -0,0 +1,158 @@
1Copyright 2009 Jonathan Corbet <corbet@lwn.net>
2
3Debugfs exists as a simple way for kernel developers to make information
4available to user space. Unlike /proc, which is only meant for information
5about a process, or sysfs, which has strict one-value-per-file rules,
6debugfs has no rules at all. Developers can put any information they want
7there. The debugfs filesystem is also intended to not serve as a stable
8ABI to user space; in theory, there are no stability constraints placed on
9files exported there. The real world is not always so simple, though [1];
10even debugfs interfaces are best designed with the idea that they will need
11to be maintained forever.
12
13Debugfs is typically mounted with a command like:
14
15 mount -t debugfs none /sys/kernel/debug
16
17(Or an equivalent /etc/fstab line).
18
19Note that the debugfs API is exported GPL-only to modules.
20
21Code using debugfs should include <linux/debugfs.h>. Then, the first order
22of business will be to create at least one directory to hold a set of
23debugfs files:
24
25 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
26
27This call, if successful, will make a directory called name underneath the
28indicated parent directory. If parent is NULL, the directory will be
29created in the debugfs root. On success, the return value is a struct
30dentry pointer which can be used to create files in the directory (and to
31clean it up at the end). A NULL return value indicates that something went
32wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the
33kernel has been built without debugfs support and none of the functions
34described below will work.
35
36The most general way to create a file within a debugfs directory is with:
37
38 struct dentry *debugfs_create_file(const char *name, mode_t mode,
39 struct dentry *parent, void *data,
40 const struct file_operations *fops);
41
42Here, name is the name of the file to create, mode describes the access
43permissions the file should have, parent indicates the directory which
44should hold the file, data will be stored in the i_private field of the
45resulting inode structure, and fops is a set of file operations which
46implement the file's behavior. At a minimum, the read() and/or write()
47operations should be provided; others can be included as needed. Again,
48the return value will be a dentry pointer to the created file, NULL for
49error, or ERR_PTR(-ENODEV) if debugfs support is missing.
50
51In a number of cases, the creation of a set of file operations is not
52actually necessary; the debugfs code provides a number of helper functions
53for simple situations. Files containing a single integer value can be
54created with any of:
55
56 struct dentry *debugfs_create_u8(const char *name, mode_t mode,
57 struct dentry *parent, u8 *value);
58 struct dentry *debugfs_create_u16(const char *name, mode_t mode,
59 struct dentry *parent, u16 *value);
60 struct dentry *debugfs_create_u32(const char *name, mode_t mode,
61 struct dentry *parent, u32 *value);
62 struct dentry *debugfs_create_u64(const char *name, mode_t mode,
63 struct dentry *parent, u64 *value);
64
65These files support both reading and writing the given value; if a specific
66file should not be written to, simply set the mode bits accordingly. The
67values in these files are in decimal; if hexadecimal is more appropriate,
68the following functions can be used instead:
69
70 struct dentry *debugfs_create_x8(const char *name, mode_t mode,
71 struct dentry *parent, u8 *value);
72 struct dentry *debugfs_create_x16(const char *name, mode_t mode,
73 struct dentry *parent, u16 *value);
74 struct dentry *debugfs_create_x32(const char *name, mode_t mode,
75 struct dentry *parent, u32 *value);
76
77Note that there is no debugfs_create_x64().
78
79These functions are useful as long as the developer knows the size of the
80value to be exported. Some types can have different widths on different
81architectures, though, complicating the situation somewhat. There is a
82function meant to help out in one special case:
83
84 struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
85 struct dentry *parent,
86 size_t *value);
87
88As might be expected, this function will create a debugfs file to represent
89a variable of type size_t.
90
91Boolean values can be placed in debugfs with:
92
93 struct dentry *debugfs_create_bool(const char *name, mode_t mode,
94 struct dentry *parent, u32 *value);
95
96A read on the resulting file will yield either Y (for non-zero values) or
97N, followed by a newline. If written to, it will accept either upper- or
98lower-case values, or 1 or 0. Any other input will be silently ignored.
99
100Finally, a block of arbitrary binary data can be exported with:
101
102 struct debugfs_blob_wrapper {
103 void *data;
104 unsigned long size;
105 };
106
107 struct dentry *debugfs_create_blob(const char *name, mode_t mode,
108 struct dentry *parent,
109 struct debugfs_blob_wrapper *blob);
110
111A read of this file will return the data pointed to by the
112debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way
113to return several lines of (static) formatted text output. This function
114can be used to export binary information, but there does not appear to be
115any code which does so in the mainline. Note that all files created with
116debugfs_create_blob() are read-only.
117
118There are a couple of other directory-oriented helper functions:
119
120 struct dentry *debugfs_rename(struct dentry *old_dir,
121 struct dentry *old_dentry,
122 struct dentry *new_dir,
123 const char *new_name);
124
125 struct dentry *debugfs_create_symlink(const char *name,
126 struct dentry *parent,
127 const char *target);
128
129A call to debugfs_rename() will give a new name to an existing debugfs
130file, possibly in a different directory. The new_name must not exist prior
131to the call; the return value is old_dentry with updated information.
132Symbolic links can be created with debugfs_create_symlink().
133
134There is one important thing that all debugfs users must take into account:
135there is no automatic cleanup of any directories created in debugfs. If a
136module is unloaded without explicitly removing debugfs entries, the result
137will be a lot of stale pointers and no end of highly antisocial behavior.
138So all debugfs users - at least those which can be built as modules - must
139be prepared to remove all files and directories they create there. A file
140can be removed with:
141
142 void debugfs_remove(struct dentry *dentry);
143
144The dentry value can be NULL, in which case nothing will be removed.
145
146Once upon a time, debugfs users were required to remember the dentry
147pointer for every debugfs file they created so that all files could be
148cleaned up. We live in more civilized times now, though, and debugfs users
149can call:
150
151 void debugfs_remove_recursive(struct dentry *dentry);
152
153If this function is passed a pointer for the dentry corresponding to the
154top-level directory, the entire hierarchy below that directory will be
155removed.
156
157Notes:
158 [1] http://lwn.net/Articles/309298/
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 97882df04865..608fdba97b72 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -294,7 +294,7 @@ max_batch_time=usec Maximum amount of time ext4 should wait for
294 amount of time (on average) that it takes to 294 amount of time (on average) that it takes to
295 finish committing a transaction. Call this time 295 finish committing a transaction. Call this time
296 the "commit time". If the time that the 296 the "commit time". If the time that the
297 transactoin has been running is less than the 297 transaction has been running is less than the
298 commit time, ext4 will try sleeping for the 298 commit time, ext4 will try sleeping for the
299 commit time to see if other operations will join 299 commit time to see if other operations will join
300 the transaction. The commit time is capped by 300 the transaction. The commit time is capped by
@@ -328,7 +328,7 @@ noauto_da_alloc replacing existing files via patterns such as
328 journal commit, in the default data=ordered 328 journal commit, in the default data=ordered
329 mode, the data blocks of the new file are forced 329 mode, the data blocks of the new file are forced
330 to disk before the rename() operation is 330 to disk before the rename() operation is
331 commited. This provides roughly the same level 331 committed. This provides roughly the same level
332 of guarantees as ext3, and avoids the 332 of guarantees as ext3, and avoids the
333 "zero-length" problem that can happen when a 333 "zero-length" problem that can happen when a
334 system crashes before the delayed allocation 334 system crashes before the delayed allocation
@@ -358,7 +358,7 @@ written to the journal first, and then to its final location.
358In the event of a crash, the journal can be replayed, bringing both data and 358In the event of a crash, the journal can be replayed, bringing both data and
359metadata into a consistent state. This mode is the slowest except when data 359metadata into a consistent state. This mode is the slowest except when data
360needs to be read from and written to disk at the same time where it 360needs to be read from and written to disk at the same time where it
361outperforms all others modes. Curently ext4 does not have delayed 361outperforms all others modes. Currently ext4 does not have delayed
362allocation support if this data journalling mode is selected. 362allocation support if this data journalling mode is selected.
363 363
364References 364References
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
index 1e3defcfe50b..606233cd4618 100644
--- a/Documentation/filesystems/fiemap.txt
+++ b/Documentation/filesystems/fiemap.txt
@@ -204,7 +204,7 @@ fiemap_check_flags() helper:
204 204
205int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); 205int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
206 206
207The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The 207The struct fieinfo should be passed in as received from ioctl_fiemap(). The
208set of fiemap flags which the fs understands should be passed via fs_flags. If 208set of fiemap flags which the fs understands should be passed via fs_flags. If
209fiemap_check_flags finds invalid user flags, it will place the bad values in 209fiemap_check_flags finds invalid user flags, it will place the bad values in
210fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from 210fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
index 4dae9a3840bf..0494f78d87e4 100644
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -60,7 +60,7 @@ go_lock | Called for the first local holder of a lock
60go_unlock | Called on the final local unlock of a lock 60go_unlock | Called on the final local unlock of a lock
61go_dump | Called to print content of object for debugfs file, or on 61go_dump | Called to print content of object for debugfs file, or on
62 | error to dump glock to the log. 62 | error to dump glock to the log.
63go_type; | The type of the glock, LM_TYPE_..... 63go_type | The type of the glock, LM_TYPE_.....
64go_min_hold_time | The minimum hold time 64go_min_hold_time | The minimum hold time
65 65
66The minimum hold time for each lock is the time after a remote lock 66The minimum hold time for each lock is the time after a remote lock
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt
index 593004b6bbab..5e3ab8f3beff 100644
--- a/Documentation/filesystems/gfs2.txt
+++ b/Documentation/filesystems/gfs2.txt
@@ -11,18 +11,15 @@ their I/O so file system consistency is maintained. One of the nifty
11features of GFS is perfect consistency -- changes made to the file system 11features of GFS is perfect consistency -- changes made to the file system
12on one machine show up immediately on all other machines in the cluster. 12on one machine show up immediately on all other machines in the cluster.
13 13
14GFS uses interchangable inter-node locking mechanisms. Different lock 14GFS uses interchangable inter-node locking mechanisms, the currently
15modules can plug into GFS and each file system selects the appropriate 15supported mechanisms are:
16lock module at mount time. Lock modules include:
17 16
18 lock_nolock -- allows gfs to be used as a local file system 17 lock_nolock -- allows gfs to be used as a local file system
19 18
20 lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking 19 lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
21 The dlm is found at linux/fs/dlm/ 20 The dlm is found at linux/fs/dlm/
22 21
23In addition to interfacing with an external locking manager, a gfs lock 22Lock_dlm depends on user space cluster management systems found
24module is responsible for interacting with external cluster management
25systems. Lock_dlm depends on user space cluster management systems found
26at the URL above. 23at the URL above.
27 24
28To use gfs as a local file system, no external clustering systems are 25To use gfs as a local file system, no external clustering systems are
@@ -31,13 +28,19 @@ needed, simply:
31 $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device 28 $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
32 $ mount -t gfs2 /dev/block_device /dir 29 $ mount -t gfs2 /dev/block_device /dir
33 30
34GFS2 is not on-disk compatible with previous versions of GFS. 31If you are using Fedora, you need to install the gfs2-utils package
32and, for lock_dlm, you will also need to install the cman package
33and write a cluster.conf as per the documentation.
34
35GFS2 is not on-disk compatible with previous versions of GFS, but it
36is pretty close.
35 37
36The following man pages can be found at the URL above: 38The following man pages can be found at the URL above:
37 gfs2_fsck to repair a filesystem 39 fsck.gfs2 to repair a filesystem
38 gfs2_grow to expand a filesystem online 40 gfs2_grow to expand a filesystem online
39 gfs2_jadd to add journals to a filesystem online 41 gfs2_jadd to add journals to a filesystem online
40 gfs2_tool to manipulate, examine and tune a filesystem 42 gfs2_tool to manipulate, examine and tune a filesystem
41 gfs2_quota to examine and change quota values in a filesystem 43 gfs2_quota to examine and change quota values in a filesystem
44 gfs2_convert to convert a gfs filesystem to gfs2 in-place
42 mount.gfs2 to help mount(8) mount a filesystem 45 mount.gfs2 to help mount(8) mount a filesystem
43 mkfs.gfs2 to make a filesystem 46 mkfs.gfs2 to make a filesystem
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
index 85eaeaddd27c..e386f7e4bcee 100644
--- a/Documentation/filesystems/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -100,7 +100,7 @@ Installation
100 $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs 100 $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
101 101
102 In this location, mount.nfs will be invoked automatically for NFS mounts 102 In this location, mount.nfs will be invoked automatically for NFS mounts
103 by the system mount commmand. 103 by the system mount command.
104 104
105 NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed 105 NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
106 on the NFS client machine. You do not need this specific version of 106 on the NFS client machine. You do not need this specific version of
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index 55c4300abfcb..01539f410676 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -39,9 +39,8 @@ Features which NILFS2 does not support yet:
39 - extended attributes 39 - extended attributes
40 - POSIX ACLs 40 - POSIX ACLs
41 - quotas 41 - quotas
42 - writable snapshots 42 - fsck
43 - remote backup (CDP) 43 - resize
44 - data integrity
45 - defragmentation 44 - defragmentation
46 45
47Mount options 46Mount options
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ce84cfc9eae0..cd8717a36271 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -366,7 +366,7 @@ just those considered 'most important'. The new vectors are:
366 RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are 366 RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are
367 sent from one CPU to another per the needs of the OS. Typically, 367 sent from one CPU to another per the needs of the OS. Typically,
368 their statistics are used by kernel developers and interested users to 368 their statistics are used by kernel developers and interested users to
369 determine the occurance of interrupt of the given type. 369 determine the occurrence of interrupts of the given type.
370 370
371The above IRQ vectors are displayed only when relevent. For example, 371The above IRQ vectors are displayed only when relevent. For example,
372the threshold vector does not exist on x86_64 platforms. Others are 372the threshold vector does not exist on x86_64 platforms. Others are
@@ -551,7 +551,7 @@ Committed_AS: The amount of memory presently allocated on the system.
551 memory once that memory has been successfully allocated. 551 memory once that memory has been successfully allocated.
552VmallocTotal: total size of vmalloc memory area 552VmallocTotal: total size of vmalloc memory area
553 VmallocUsed: amount of vmalloc area which is used 553 VmallocUsed: amount of vmalloc area which is used
554VmallocChunk: largest contigious block of vmalloc area which is free 554VmallocChunk: largest contiguous block of vmalloc area which is free
555 555
556.............................................................................. 556..............................................................................
557 557
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 26e4b8bc53ee..85354b32d731 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -72,7 +72,7 @@ The 'rom' file is special in that it provides read-only access to the device's
72ROM file, if available. It's disabled by default, however, so applications 72ROM file, if available. It's disabled by default, however, so applications
73should write the string "1" to the file to enable it before attempting a read 73should write the string "1" to the file to enable it before attempting a read
74call, and disable it following the access by writing "0" to the file. Note 74call, and disable it following the access by writing "0" to the file. Note
75that the device must be enabled for a rom read to return data succesfully. 75that the device must be enabled for a rom read to return data successfully.
76In the event a driver is not bound to the device, it can be enabled using the 76In the event a driver is not bound to the device, it can be enabled using the
77'enable' file, documented above. 77'enable' file, documented above.
78 78
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 7d41c0da6344..b58b84b50fa2 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -124,10 +124,10 @@ sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as
124flush -- If set, the filesystem will try to flush to disk more 124flush -- If set, the filesystem will try to flush to disk more
125 early than normal. Not set by default. 125 early than normal. Not set by default.
126 126
127rodir -- FAT has the ATTR_RO (read-only) attribute. But on Windows, 127rodir -- FAT has the ATTR_RO (read-only) attribute. On Windows,
128 the ATTR_RO of the directory will be just ignored actually, 128 the ATTR_RO of the directory will just be ignored,
129 and is used by only applications as flag. E.g. it's setted 129 and is used only by applications as a flag (e.g. it's set
130 for the customized folder. 130 for the customized folder).
131 131
132 If you want to use ATTR_RO as read-only flag even for 132 If you want to use ATTR_RO as read-only flag even for
133 the directory, set this option. 133 the directory, set this option.
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644
index 000000000000..9dc1ff4fd536
--- /dev/null
+++ b/Documentation/futex-requeue-pi.txt
@@ -0,0 +1,131 @@
1Futex Requeue PI
2----------------
3
4Requeueing of tasks from a non-PI futex to a PI futex requires
5special handling in order to ensure the underlying rt_mutex is never
6left without an owner if it has waiters; doing so would break the PI
7boosting logic [see rt-mutex-desgin.txt] For the purposes of
8brevity, this action will be referred to as "requeue_pi" throughout
9this document. Priority inheritance is abbreviated throughout as
10"PI".
11
12Motivation
13----------
14
15Without requeue_pi, the glibc implementation of
16pthread_cond_broadcast() must resort to waking all the tasks waiting
17on a pthread_condvar and letting them try to sort out which task
18gets to run first in classic thundering-herd formation. An ideal
19implementation would wake the highest-priority waiter, and leave the
20rest to the natural wakeup inherent in unlocking the mutex
21associated with the condvar.
22
23Consider the simplified glibc calls:
24
25/* caller must lock mutex */
26pthread_cond_wait(cond, mutex)
27{
28 lock(cond->__data.__lock);
29 unlock(mutex);
30 do {
31 unlock(cond->__data.__lock);
32 futex_wait(cond->__data.__futex);
33 lock(cond->__data.__lock);
34 } while(...)
35 unlock(cond->__data.__lock);
36 lock(mutex);
37}
38
39pthread_cond_broadcast(cond)
40{
41 lock(cond->__data.__lock);
42 unlock(cond->__data.__lock);
43 futex_requeue(cond->data.__futex, cond->mutex);
44}
45
46Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
47has waiters. Note that pthread_cond_wait() attempts to lock the
48mutex only after it has returned to user space. This will leave the
49underlying rt_mutex with waiters, and no owner, breaking the
50previously mentioned PI-boosting algorithms.
51
52In order to support PI-aware pthread_condvar's, the kernel needs to
53be able to requeue tasks to PI futexes. This support implies that
54upon a successful futex_wait system call, the caller would return to
55user space already holding the PI futex. The glibc implementation
56would be modified as follows:
57
58
59/* caller must lock mutex */
60pthread_cond_wait_pi(cond, mutex)
61{
62 lock(cond->__data.__lock);
63 unlock(mutex);
64 do {
65 unlock(cond->__data.__lock);
66 futex_wait_requeue_pi(cond->__data.__futex);
67 lock(cond->__data.__lock);
68 } while(...)
69 unlock(cond->__data.__lock);
70 /* the kernel acquired the the mutex for us */
71}
72
73pthread_cond_broadcast_pi(cond)
74{
75 lock(cond->__data.__lock);
76 unlock(cond->__data.__lock);
77 futex_requeue_pi(cond->data.__futex, cond->mutex);
78}
79
80The actual glibc implementation will likely test for PI and make the
81necessary changes inside the existing calls rather than creating new
82calls for the PI cases. Similar changes are needed for
83pthread_cond_timedwait() and pthread_cond_signal().
84
85Implementation
86--------------
87
88In order to ensure the rt_mutex has an owner if it has waiters, it
89is necessary for both the requeue code, as well as the waiting code,
90to be able to acquire the rt_mutex before returning to user space.
91The requeue code cannot simply wake the waiter and leave it to
92acquire the rt_mutex as it would open a race window between the
93requeue call returning to user space and the waiter waking and
94starting to run. This is especially true in the uncontended case.
95
96The solution involves two new rt_mutex helper routines,
97rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
98allow the requeue code to acquire an uncontended rt_mutex on behalf
99of the waiter and to enqueue the waiter on a contended rt_mutex.
100Two new system calls provide the kernel<->user interface to
101requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
102
103FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
104and pthread_cond_timedwait()) to block on the initial futex and wait
105to be requeued to a PI-aware futex. The implementation is the
106result of a high-speed collision between futex_wait() and
107futex_lock_pi(), with some extra logic to check for the additional
108wake-up scenarios.
109
110FUTEX_REQUEUE_CMP_PI is called by the waker
111(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
112possibly wake the waiting tasks. Internally, this system call is
113still handled by futex_requeue (by passing requeue_pi=1). Before
114requeueing, futex_requeue() attempts to acquire the requeue target
115PI futex on behalf of the top waiter. If it can, this waiter is
116woken. futex_requeue() then proceeds to requeue the remaining
117nr_wake+nr_requeue tasks to the PI futex, calling
118rt_mutex_start_proxy_lock() prior to each requeue to prepare the
119task as a waiter on the underlying rt_mutex. It is possible that
120the lock can be acquired at this stage as well, if so, the next
121waiter is woken to finish the acquisition of the lock.
122
123FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
124their sum is all that really matters. futex_requeue() will wake or
125requeue up to nr_wake + nr_requeue tasks. It will wake only as many
126tasks as it can acquire the lock for, which in the majority of cases
127should be 0 as good programming practice dictates that the caller of
128either pthread_cond_broadcast() or pthread_cond_signal() acquire the
129mutex prior to making the call. FUTEX_REQUEUE_PI requires that
130nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
131signal.
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 145c25a170c7..e4b6985044a2 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -458,7 +458,7 @@ debugfs interface, since it provides control over GPIO direction and
458value instead of just showing a gpio state summary. Plus, it could be 458value instead of just showing a gpio state summary. Plus, it could be
459present on production systems without debugging support. 459present on production systems without debugging support.
460 460
461Given approprate hardware documentation for the system, userspace could 461Given appropriate hardware documentation for the system, userspace could
462know for example that GPIO #23 controls the write protect line used to 462know for example that GPIO #23 controls the write protect line used to
463protect boot loader segments in flash memory. System upgrade procedures 463protect boot loader segments in flash memory. System upgrade procedures
464may need to temporarily remove that protection, first importing a GPIO, 464may need to temporarily remove that protection, first importing a GPIO,
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index a8321267b5b6..bee4c30bc1e2 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg
@@ -2,14 +2,18 @@ Kernel driver f71882fg
2====================== 2======================
3 3
4Supported chips: 4Supported chips:
5 * Fintek F71882FG and F71883FG 5 * Fintek F71858FG
6 Prefix: 'f71882fg' 6 Prefix: 'f71858fg'
7 Addresses scanned: none, address read from Super I/O config space 7 Addresses scanned: none, address read from Super I/O config space
8 Datasheet: Available from the Fintek website 8 Datasheet: Available from the Fintek website
9 * Fintek F71862FG and F71863FG 9 * Fintek F71862FG and F71863FG
10 Prefix: 'f71862fg' 10 Prefix: 'f71862fg'
11 Addresses scanned: none, address read from Super I/O config space 11 Addresses scanned: none, address read from Super I/O config space
12 Datasheet: Available from the Fintek website 12 Datasheet: Available from the Fintek website
13 * Fintek F71882FG and F71883FG
14 Prefix: 'f71882fg'
15 Addresses scanned: none, address read from Super I/O config space
16 Datasheet: Available from the Fintek website
13 * Fintek F8000 17 * Fintek F8000
14 Prefix: 'f8000' 18 Prefix: 'f8000'
15 Addresses scanned: none, address read from Super I/O config space 19 Addresses scanned: none, address read from Super I/O config space
@@ -66,13 +70,13 @@ printed when loading the driver.
66 70
67Three different fan control modes are supported; the mode number is written 71Three different fan control modes are supported; the mode number is written
68to the pwm#_enable file. Note that not all modes are supported on all 72to the pwm#_enable file. Note that not all modes are supported on all
69chips, and some modes may only be available in RPM / PWM mode on the F8000. 73chips, and some modes may only be available in RPM / PWM mode.
70Writing an unsupported mode will result in an invalid parameter error. 74Writing an unsupported mode will result in an invalid parameter error.
71 75
72* 1: Manual mode 76* 1: Manual mode
73 You ask for a specific PWM duty cycle / DC voltage or a specific % of 77 You ask for a specific PWM duty cycle / DC voltage or a specific % of
74 fan#_full_speed by writing to the pwm# file. This mode is only 78 fan#_full_speed by writing to the pwm# file. This mode is only
75 available on the F8000 if the fan channel is in RPM mode. 79 available on the F71858FG / F8000 if the fan channel is in RPM mode.
76 80
77* 2: Normal auto mode 81* 2: Normal auto mode
78 You can define a number of temperature/fan speed trip points, which % the 82 You can define a number of temperature/fan speed trip points, which % the
diff --git a/Documentation/hwmon/ibmaem b/Documentation/hwmon/ibmaem
index e98bdfea3467..1e0d59e000b4 100644
--- a/Documentation/hwmon/ibmaem
+++ b/Documentation/hwmon/ibmaem
@@ -7,7 +7,7 @@ henceforth as AEM.
7Supported systems: 7Supported systems:
8 * Any recent IBM System X server with AEM support. 8 * Any recent IBM System X server with AEM support.
9 This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2, 9 This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2,
10 x3950 M2, and certain HS2x/LS2x/QS2x blades. The IPMI host interface 10 x3950 M2, and certain HC10/HS2x/LS2x/QS2x blades. The IPMI host interface
11 driver ("ipmi-si") needs to be loaded for this driver to do anything. 11 driver ("ipmi-si") needs to be loaded for this driver to do anything.
12 Prefix: 'ibmaem' 12 Prefix: 'ibmaem'
13 Datasheet: Not available 13 Datasheet: Not available
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 004ee161721e..dcbd502c8792 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -70,6 +70,7 @@ are interpreted as 0! For more on how written strings are interpreted see the
70[0-*] denotes any positive number starting from 0 70[0-*] denotes any positive number starting from 0
71[1-*] denotes any positive number starting from 1 71[1-*] denotes any positive number starting from 1
72RO read only value 72RO read only value
73WO write only value
73RW read/write value 74RW read/write value
74 75
75Read/write values may be read-only for some chips, depending on the 76Read/write values may be read-only for some chips, depending on the
@@ -295,6 +296,24 @@ temp[1-*]_label Suggested temperature channel label.
295 user-space. 296 user-space.
296 RO 297 RO
297 298
299temp[1-*]_lowest
300 Historical minimum temperature
301 Unit: millidegree Celsius
302 RO
303
304temp[1-*]_highest
305 Historical maximum temperature
306 Unit: millidegree Celsius
307 RO
308
309temp[1-*]_reset_history
310 Reset temp_lowest and temp_highest
311 WO
312
313temp_reset_history
314 Reset temp_lowest and temp_highest for all sensors
315 WO
316
298Some chips measure temperature using external thermistors and an ADC, and 317Some chips measure temperature using external thermistors and an ADC, and
299report the temperature measurement as a voltage. Converting this voltage 318report the temperature measurement as a voltage. Converting this voltage
300back to a temperature (or the other way around for limits) requires 319back to a temperature (or the other way around for limits) requires
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401
new file mode 100644
index 000000000000..9fc447249212
--- /dev/null
+++ b/Documentation/hwmon/tmp401
@@ -0,0 +1,42 @@
1Kernel driver tmp401
2====================
3
4Supported chips:
5 * Texas Instruments TMP401
6 Prefix: 'tmp401'
7 Addresses scanned: I2C 0x4c
8 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html
9 * Texas Instruments TMP411
10 Prefix: 'tmp411'
11 Addresses scanned: I2C 0x4c
12 Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html
13
14Authors:
15 Hans de Goede <hdegoede@redhat.com>
16 Andre Prendel <andre.prendel@gmx.de>
17
18Description
19-----------
20
21This driver implements support for Texas Instruments TMP401 and
22TMP411 chips. These chips implements one remote and one local
23temperature sensor. Temperature is measured in degrees
24Celsius. Resolution of the remote sensor is 0.0625 degree. Local
25sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not
26supported by the driver so far, so using the default resolution of 0.5
27degree).
28
29The driver provides the common sysfs-interface for temperatures (see
30/Documentation/hwmon/sysfs-interface under Temperatures).
31
32The TMP411 chip is compatible with TMP401. It provides some additional
33features.
34
35* Minimum and Maximum temperature measured since power-on, chip-reset
36
37 Exported via sysfs attributes tempX_lowest and tempX_highest.
38
39* Reset of historical minimum/maximum temperature measurements
40
41 Exported via sysfs attribute temp_reset_history. Writing 1 to this
42 file triggers a reset.
diff --git a/Documentation/hwmon/w83627ehf b/Documentation/hwmon/w83627ehf
index b6eb59384bb3..02b74899edaf 100644
--- a/Documentation/hwmon/w83627ehf
+++ b/Documentation/hwmon/w83627ehf
@@ -12,6 +12,10 @@ Supported chips:
12 Addresses scanned: ISA address retrieved from Super I/O registers 12 Addresses scanned: ISA address retrieved from Super I/O registers
13 Datasheet: 13 Datasheet:
14 http://www.nuvoton.com.tw/NR/rdonlyres/7885623D-A487-4CF9-A47F-30C5F73D6FE6/0/W83627DHG.pdf 14 http://www.nuvoton.com.tw/NR/rdonlyres/7885623D-A487-4CF9-A47F-30C5F73D6FE6/0/W83627DHG.pdf
15 * Winbond W83627DHG-P
16 Prefix: 'w83627dhg'
17 Addresses scanned: ISA address retrieved from Super I/O registers
18 Datasheet: not available
15 * Winbond W83667HG 19 * Winbond W83667HG
16 Prefix: 'w83667hg' 20 Prefix: 'w83667hg'
17 Addresses scanned: ISA address retrieved from Super I/O registers 21 Addresses scanned: ISA address retrieved from Super I/O registers
@@ -28,8 +32,8 @@ Description
28----------- 32-----------
29 33
30This driver implements support for the Winbond W83627EHF, W83627EHG, 34This driver implements support for the Winbond W83627EHF, W83627EHG,
31W83627DHG and W83667HG super I/O chips. We will refer to them collectively 35W83627DHG, W83627DHG-P and W83667HG super I/O chips. We will refer to them
32as Winbond chips. 36collectively as Winbond chips.
33 37
34The chips implement three temperature sensors, five fan rotation 38The chips implement three temperature sensors, five fan rotation
35speed sensors, ten analog voltage sensors (only nine for the 627DHG), one 39speed sensors, ten analog voltage sensors (only nine for the 627DHG), one
@@ -135,3 +139,6 @@ done in the driver for all register addresses.
135The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and 139The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and
136the ICH8 southbridge gets that data via PECI from the DHG, so that the 140the ICH8 southbridge gets that data via PECI from the DHG, so that the
137southbridge drives the fans. And the DHG supports SST, a one-wire serial bus. 141southbridge drives the fans. And the DHG supports SST, a one-wire serial bus.
142
143The DHG-P has an additional automatic fan speed control mode named Smart Fan
144(TM) III+. This mode is not yet supported by the driver.
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index cfcebb10d14e..c269aaa2f26a 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -20,6 +20,8 @@ platform_device with the base address and interrupt number. The
20dev.platform_data of the device should also point to a struct 20dev.platform_data of the device should also point to a struct
21ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the 21ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the
22distance between registers and the input clock speed. 22distance between registers and the input clock speed.
23There is also a possibility to attach a list of i2c_board_info which
24the i2c-ocores driver will add to the bus upon creation.
23 25
24E.G. something like: 26E.G. something like:
25 27
@@ -36,9 +38,24 @@ static struct resource ocores_resources[] = {
36 }, 38 },
37}; 39};
38 40
41/* optional board info */
42struct i2c_board_info ocores_i2c_board_info[] = {
43 {
44 I2C_BOARD_INFO("tsc2003", 0x48),
45 .platform_data = &tsc2003_platform_data,
46 .irq = TSC_IRQ
47 },
48 {
49 I2C_BOARD_INFO("adv7180", 0x42 >> 1),
50 .irq = ADV_IRQ
51 }
52};
53
39static struct ocores_i2c_platform_data myi2c_data = { 54static struct ocores_i2c_platform_data myi2c_data = {
40 .regstep = 2, /* two bytes between registers */ 55 .regstep = 2, /* two bytes between registers */
41 .clock_khz = 50000, /* input clock of 50MHz */ 56 .clock_khz = 50000, /* input clock of 50MHz */
57 .devices = ocores_i2c_board_info, /* optional table of devices */
58 .num_devices = ARRAY_SIZE(ocores_i2c_board_info), /* table size */
42}; 59};
43 60
44static struct platform_device myi2c = { 61static struct platform_device myi2c = {
diff --git a/Documentation/i2c/busses/i2c-viapro b/Documentation/i2c/busses/i2c-viapro
index 22efedf60c87..2e758b0e9456 100644
--- a/Documentation/i2c/busses/i2c-viapro
+++ b/Documentation/i2c/busses/i2c-viapro
@@ -19,6 +19,9 @@ Supported adapters:
19 * VIA Technologies, Inc. VX800/VX820 19 * VIA Technologies, Inc. VX800/VX820
20 Datasheet: available on http://linux.via.com.tw 20 Datasheet: available on http://linux.via.com.tw
21 21
22 * VIA Technologies, Inc. VX855/VX875
23 Datasheet: Availability unknown
24
22Authors: 25Authors:
23 Kyösti Mälkki <kmalkki@cc.hut.fi>, 26 Kyösti Mälkki <kmalkki@cc.hut.fi>,
24 Mark D. Studebaker <mdsxyz123@yahoo.com>, 27 Mark D. Studebaker <mdsxyz123@yahoo.com>,
@@ -53,6 +56,7 @@ Your lspci -n listing must show one of these :
53 device 1106:3287 (VT8251) 56 device 1106:3287 (VT8251)
54 device 1106:8324 (CX700) 57 device 1106:8324 (CX700)
55 device 1106:8353 (VX800/VX820) 58 device 1106:8353 (VX800/VX820)
59 device 1106:8409 (VX855/VX875)
56 60
57If none of these show up, you should look in the BIOS for settings like 61If none of these show up, you should look in the BIOS for settings like
58enable ACPI / SMBus or even USB. 62enable ACPI / SMBus or even USB.
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
index 0c78f4b1d9d9..e77bebfa7b0d 100644
--- a/Documentation/ide/ide.txt
+++ b/Documentation/ide/ide.txt
@@ -216,6 +216,8 @@ Other kernel parameters for ide_core are:
216 216
217* "noflush=[interface_number.device_number]" to disable flush requests 217* "noflush=[interface_number.device_number]" to disable flush requests
218 218
219* "nohpa=[interface_number.device_number]" to disable Host Protected Area
220
219* "noprobe=[interface_number.device_number]" to skip probing 221* "noprobe=[interface_number.device_number]" to skip probing
220 222
221* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit 223* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
diff --git a/Documentation/isdn/00-INDEX b/Documentation/isdn/00-INDEX
index 5a2d69989a8c..f6010a536590 100644
--- a/Documentation/isdn/00-INDEX
+++ b/Documentation/isdn/00-INDEX
@@ -22,16 +22,11 @@ README.gigaset
22 - info on the drivers for Siemens Gigaset ISDN adapters. 22 - info on the drivers for Siemens Gigaset ISDN adapters.
23README.icn 23README.icn
24 - info on the ICN-ISDN-card and its driver. 24 - info on the ICN-ISDN-card and its driver.
25>>>>>>> 93af7aca44f0e82e67bda10a0fb73d383edcc8bd:Documentation/isdn/00-INDEX
25README.HiSax 26README.HiSax
26 - info on the HiSax driver which replaces the old teles. 27 - info on the HiSax driver which replaces the old teles.
27README.hfc-pci 28README.audio
28 - info on hfc-pci based cards. 29 - info for running audio over ISDN.
29README.pcbit
30 - info on the PCBIT-D ISDN adapter and driver.
31README.syncppp
32 - info on running Sync PPP over ISDN.
33syncPPP.FAQ
34 - frequently asked questions about running PPP over ISDN.
35README.avmb1 30README.avmb1
36 - info on driver for AVM-B1 ISDN card. 31 - info on driver for AVM-B1 ISDN card.
37README.act2000 32README.act2000
@@ -42,10 +37,28 @@ README.concap
42 - info on "CONCAP" encapsulation protocol interface used for X.25. 37 - info on "CONCAP" encapsulation protocol interface used for X.25.
43README.diversion 38README.diversion
44 - info on module for isdn diversion services. 39 - info on module for isdn diversion services.
40README.fax
41 - info for using Fax over ISDN.
42README.gigaset
43 - info on the drivers for Siemens Gigaset ISDN adapters
44README.hfc-pci
45 - info on hfc-pci based cards.
46README.hysdn
47 - info on driver for Hypercope active HYSDN cards
48README.icn
49 - info on the ICN-ISDN-card and its driver.
50README.mISDN
51 - info on the Modular ISDN subsystem (mISDN)
52README.pcbit
53 - info on the PCBIT-D ISDN adapter and driver.
45README.sc 54README.sc
46 - info on driver for Spellcaster cards. 55 - info on driver for Spellcaster cards.
56README.syncppp
57 - info on running Sync PPP over ISDN.
47README.x25 58README.x25
48 - info for running X.25 over ISDN. 59 - info for running X.25 over ISDN.
60syncPPP.FAQ
61 - frequently asked questions about running PPP over ISDN.
49README.hysdn 62README.hysdn
50 - info on driver for Hypercope active HYSDN cards 63 - info on driver for Hypercope active HYSDN cards
51README.mISDN 64README.mISDN
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 786d619b36e5..686e107923ec 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -45,7 +45,7 @@ From then on, Kernel CAPI may call the registered callback functions for the
45device. 45device.
46 46
47If the device becomes unusable for any reason (shutdown, disconnect ...), the 47If the device becomes unusable for any reason (shutdown, disconnect ...), the
48driver has to call capi_ctr_reseted(). This will prevent further calls to the 48driver has to call capi_ctr_down(). This will prevent further calls to the
49callback functions by Kernel CAPI. 49callback functions by Kernel CAPI.
50 50
51 51
@@ -114,20 +114,36 @@ char *driver_name
114int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata) 114int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata)
115 (optional) pointer to a callback function for sending firmware and 115 (optional) pointer to a callback function for sending firmware and
116 configuration data to the device 116 configuration data to the device
117 Return value: 0 on success, error code on error
118 Called in process context.
117 119
118void (*reset_ctr)(struct capi_ctr *ctrlr) 120void (*reset_ctr)(struct capi_ctr *ctrlr)
119 pointer to a callback function for performing a reset on the device, 121 (optional) pointer to a callback function for performing a reset on
120 releasing all registered applications 122 the device, releasing all registered applications
123 Called in process context.
121 124
122void (*register_appl)(struct capi_ctr *ctrlr, u16 applid, 125void (*register_appl)(struct capi_ctr *ctrlr, u16 applid,
123 capi_register_params *rparam) 126 capi_register_params *rparam)
124void (*release_appl)(struct capi_ctr *ctrlr, u16 applid) 127void (*release_appl)(struct capi_ctr *ctrlr, u16 applid)
125 pointers to callback functions for registration and deregistration of 128 pointers to callback functions for registration and deregistration of
126 applications with the device 129 applications with the device
130 Calls to these functions are serialized by Kernel CAPI so that only
131 one call to any of them is active at any time.
127 132
128u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb) 133u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)
129 pointer to a callback function for sending a CAPI message to the 134 pointer to a callback function for sending a CAPI message to the
130 device 135 device
136 Return value: CAPI error code
137 If the method returns 0 (CAPI_NOERROR) the driver has taken ownership
138 of the skb and the caller may no longer access it. If it returns a
139 non-zero (error) value then ownership of the skb returns to the caller
140 who may reuse or free it.
141 The return value should only be used to signal problems with respect
142 to accepting or queueing the message. Errors occurring during the
143 actual processing of the message should be signaled with an
144 appropriate reply message.
145 Calls to this function are not serialized by Kernel CAPI, ie. it must
146 be prepared to be re-entered.
131 147
132char *(*procinfo)(struct capi_ctr *ctrlr) 148char *(*procinfo)(struct capi_ctr *ctrlr)
133 pointer to a callback function returning the entry for the device in 149 pointer to a callback function returning the entry for the device in
@@ -138,6 +154,8 @@ read_proc_t *ctr_read_proc
138 system entry, /proc/capi/controllers/<n>; will be called with a 154 system entry, /proc/capi/controllers/<n>; will be called with a
139 pointer to the device's capi_ctr structure as the last (data) argument 155 pointer to the device's capi_ctr structure as the last (data) argument
140 156
157Note: Callback functions are never called in interrupt context.
158
141- to be filled in before calling capi_ctr_ready(): 159- to be filled in before calling capi_ctr_ready():
142 160
143u8 manu[CAPI_MANUFACTURER_LEN] 161u8 manu[CAPI_MANUFACTURER_LEN]
@@ -153,6 +171,45 @@ u8 serial[CAPI_SERIAL_LEN]
153 value to return for CAPI_GET_SERIAL 171 value to return for CAPI_GET_SERIAL
154 172
155 173
1744.3 The _cmsg Structure
175
176(declared in <linux/isdn/capiutil.h>)
177
178The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
179accessible form. It contains members for all possible CAPI 2.0 parameters, of
180which only those appearing in the message type currently being processed are
181actually used. Unused members should be set to zero.
182
183Members are named after the CAPI 2.0 standard names of the parameters they
184represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
185types are:
186
187u8 for CAPI parameters of type 'byte'
188
189u16 for CAPI parameters of type 'word'
190
191u32 for CAPI parameters of type 'dword'
192
193_cstruct for CAPI parameters of type 'struct' not containing any
194 variably-sized (struct) subparameters (eg. 'Called Party Number')
195 The member is a pointer to a buffer containing the parameter in
196 CAPI encoding (length + content). It may also be NULL, which will
197 be taken to represent an empty (zero length) parameter.
198
199_cmstruct for CAPI parameters of type 'struct' containing 'struct'
200 subparameters ('Additional Info' and 'B Protocol')
201 The representation is a single byte containing one of the values:
202 CAPI_DEFAULT: the parameter is empty
203 CAPI_COMPOSE: the values of the subparameters are stored
204 individually in the corresponding _cmsg structure members
205
206Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
207messages between their transport encoding described in the CAPI 2.0 standard
208and their _cmsg structure representation. Note that capi_cmsg2message() does
209not know or check the size of its destination buffer. The caller must make
210sure it is big enough to accomodate the resulting CAPI message.
211
212
1565. Lower Layer Interface Functions 2135. Lower Layer Interface Functions
157 214
158(declared in <linux/isdn/capilli.h>) 215(declared in <linux/isdn/capilli.h>)
@@ -166,7 +223,7 @@ int detach_capi_ctr(struct capi_ctr *ctrlr)
166 register/unregister a device (controller) with Kernel CAPI 223 register/unregister a device (controller) with Kernel CAPI
167 224
168void capi_ctr_ready(struct capi_ctr *ctrlr) 225void capi_ctr_ready(struct capi_ctr *ctrlr)
169void capi_ctr_reseted(struct capi_ctr *ctrlr) 226void capi_ctr_down(struct capi_ctr *ctrlr)
170 signal controller ready/not ready 227 signal controller ready/not ready
171 228
172void capi_ctr_suspend_output(struct capi_ctr *ctrlr) 229void capi_ctr_suspend_output(struct capi_ctr *ctrlr)
@@ -211,3 +268,32 @@ CAPIMSG_CONTROL(m) CAPIMSG_SETCONTROL(m, contr) Controller/PLCI/NCCI
211 (u32) 268 (u32)
212CAPIMSG_DATALEN(m) CAPIMSG_SETDATALEN(m, len) Data Length (u16) 269CAPIMSG_DATALEN(m) CAPIMSG_SETDATALEN(m, len) Data Length (u16)
213 270
271
272Library functions for working with _cmsg structures
273(from <linux/isdn/capiutil.h>):
274
275unsigned capi_cmsg2message(_cmsg *cmsg, u8 *msg)
276 Assembles a CAPI 2.0 message from the parameters in *cmsg, storing the
277 result in *msg.
278
279unsigned capi_message2cmsg(_cmsg *cmsg, u8 *msg)
280 Disassembles the CAPI 2.0 message in *msg, storing the parameters in
281 *cmsg.
282
283unsigned capi_cmsg_header(_cmsg *cmsg, u16 ApplId, u8 Command, u8 Subcommand,
284 u16 Messagenumber, u32 Controller)
285 Fills the header part and address field of the _cmsg structure *cmsg
286 with the given values, zeroing the remainder of the structure so only
287 parameters with non-default values need to be changed before sending
288 the message.
289
290void capi_cmsg_answer(_cmsg *cmsg)
291 Sets the low bit of the Subcommand field in *cmsg, thereby converting
292 _REQ to _CONF and _IND to _RESP.
293
294char *capi_cmd2str(u8 Command, u8 Subcommand)
295 Returns the CAPI 2.0 message name corresponding to the given command
296 and subcommand values, as a static ASCII string. The return value may
297 be NULL if the command/subcommand is not one of those defined in the
298 CAPI 2.0 standard.
299
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
index 02c0e9341dd8..f9963103ae3d 100644
--- a/Documentation/isdn/README.gigaset
+++ b/Documentation/isdn/README.gigaset
@@ -149,10 +149,8 @@ GigaSet 307x Device Driver
149 configuration files and chat scripts in the gigaset-VERSION/ppp directory 149 configuration files and chat scripts in the gigaset-VERSION/ppp directory
150 in the driver packages from http://sourceforge.net/projects/gigaset307x/. 150 in the driver packages from http://sourceforge.net/projects/gigaset307x/.
151 Please note that the USB drivers are not able to change the state of the 151 Please note that the USB drivers are not able to change the state of the
152 control lines (the M105 driver can be configured to use some undocumented 152 control lines. This means you must use "Stupid Mode" if you are using
153 control requests, if you really need the control lines, though). This means 153 wvdial or you should use the nocrtscts option of pppd.
154 you must use "Stupid Mode" if you are using wvdial or you should use the
155 nocrtscts option of pppd.
156 You must also assure that the ppp_async module is loaded with the parameter 154 You must also assure that the ppp_async module is loaded with the parameter
157 flag_time=0. You can do this e.g. by adding a line like 155 flag_time=0. You can do this e.g. by adding a line like
158 156
@@ -190,20 +188,19 @@ GigaSet 307x Device Driver
190 You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode 188 You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode
191 setting (ttyGxy is ttyGU0 or ttyGB0). 189 setting (ttyGxy is ttyGU0 or ttyGB0).
192 190
1932.6. M105 Undocumented USB Requests 1912.6. Unregistered Wireless Devices (M101/M105)
194 ------------------------------ 192 -----------------------------------------
195 193 The main purpose of the ser_gigaset and usb_gigaset drivers is to allow
196 The Gigaset M105 USB data box understands a couple of useful, but 194 the M101 and M105 wireless devices to be used as ISDN devices for ISDN
197 undocumented USB commands. These requests are not used in normal 195 connections through a Gigaset base. Therefore they assume that the device
198 operation (for wireless access to the base), but are needed for access 196 is registered to a DECT base.
199 to the M105's own configuration mode (registration to the base, baudrate 197
200 and line format settings, device status queries) via the gigacontr 198 If the M101/M105 device is not registered to a base, initialization of
201 utility. Their use is controlled by the kernel configuration option 199 the device fails, and a corresponding error message is logged by the
202 "Support for undocumented USB requests" (CONFIG_GIGASET_UNDOCREQ). If you 200 driver. In that situation, a restricted set of functions is available
203 encounter error code -ENOTTY when trying to use some features of the 201 which includes, in particular, those necessary for registering the device
204 M105, try setting that option to "y" via 'make {x,menu}config' and 202 to a base or for switching it between Fixed Part and Portable Part
205 recompiling the driver. 203 modes.
206
207 204
2083. Troubleshooting 2053. Troubleshooting
209 --------------- 206 ---------------
@@ -234,11 +231,12 @@ GigaSet 307x Device Driver
234 Select Unimodem mode for all DECT data adapters. (see section 2.4.) 231 Select Unimodem mode for all DECT data adapters. (see section 2.4.)
235 232
236 Problem: 233 Problem:
237 You want to configure your USB DECT data adapter (M105) but gigacontr 234 Messages like this:
238 reports an error: "/dev/ttyGU0: Inappropriate ioctl for device". 235 usb_gigaset 3-2:1.0: Could not initialize the device.
236 appear in your syslog.
239 Solution: 237 Solution:
240 Recompile the usb_gigaset driver with the kernel configuration option 238 Check whether your M10x wireless device is correctly registered to the
241 CONFIG_GIGASET_UNDOCREQ set to 'y'. (see section 2.6.) 239 Gigaset base. (see section 2.6.)
242 240
2433.2. Telling the driver to provide more information 2413.2. Telling the driver to provide more information
244 ---------------------------------------------- 242 ----------------------------------------------
diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
index 26a7c0a93193..849b5e56d06f 100644
--- a/Documentation/kbuild/kconfig.txt
+++ b/Documentation/kbuild/kconfig.txt
@@ -35,48 +35,26 @@ new .config files to see the differences:
35 35
36(Yes, we need something better here.) 36(Yes, we need something better here.)
37 37
38
39======================================================================
40menuconfig
41--------------------------------------------------
42
43SEARCHING for CONFIG symbols
44
45Searching in menuconfig:
46
47 The Search function searches for kernel configuration symbol
48 names, so you have to know something close to what you are
49 looking for.
50
51 Example:
52 /hotplug
53 This lists all config symbols that contain "hotplug",
54 e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
55
56 For search help, enter / followed TAB-TAB-TAB (to highlight
57 <Help>) and Enter. This will tell you that you can also use
58 regular expressions (regexes) in the search string, so if you
59 are not interested in MEMORY_HOTPLUG, you could try
60
61 /^hotplug
62
63
64______________________________________________________________________ 38______________________________________________________________________
65Color Themes for 'menuconfig' 39Environment variables for '*config'
66 40
67It is possible to select different color themes using the variable 41KCONFIG_CONFIG
68MENUCONFIG_COLOR. To select a theme use: 42--------------------------------------------------
43This environment variable can be used to specify a default kernel config
44file name to override the default name of ".config".
69 45
70 make MENUCONFIG_COLOR=<theme> menuconfig 46KCONFIG_OVERWRITECONFIG
47--------------------------------------------------
48If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
49break symlinks when .config is a symlink to somewhere else.
71 50
72Available themes are: 51KCONFIG_NOTIMESTAMP
73 mono => selects colors suitable for monochrome displays 52--------------------------------------------------
74 blackbg => selects a color scheme with black background 53If this environment variable exists and is non-null, the timestamp line
75 classic => theme with blue background. The classic look 54in generated .config files is omitted.
76 bluetitle => a LCD friendly version of classic. (default)
77 55
78______________________________________________________________________ 56______________________________________________________________________
79Environment variables in 'menuconfig' 57Environment variables for '{allyes/allmod/allno/rand}config'
80 58
81KCONFIG_ALLCONFIG 59KCONFIG_ALLCONFIG
82-------------------------------------------------- 60--------------------------------------------------
@@ -95,8 +73,7 @@ values.
95This enables you to create "miniature" config (miniconfig) or custom 73This enables you to create "miniature" config (miniconfig) or custom
96config files containing just the config symbols that you are interested 74config files containing just the config symbols that you are interested
97in. Then the kernel config system generates the full .config file, 75in. Then the kernel config system generates the full .config file,
98including dependencies of your miniconfig file, based on the miniconfig 76including symbols of your miniconfig file.
99file.
100 77
101This 'KCONFIG_ALLCONFIG' file is a config file which contains 78This 'KCONFIG_ALLCONFIG' file is a config file which contains
102(usually a subset of all) preset config symbols. These variable 79(usually a subset of all) preset config symbols. These variable
@@ -113,26 +90,14 @@ These examples will disable most options (allnoconfig) but enable or
113disable the options that are explicitly listed in the specified 90disable the options that are explicitly listed in the specified
114mini-config files. 91mini-config files.
115 92
93______________________________________________________________________
94Environment variables for 'silentoldconfig'
95
116KCONFIG_NOSILENTUPDATE 96KCONFIG_NOSILENTUPDATE
117-------------------------------------------------- 97--------------------------------------------------
118If this variable has a non-blank value, it prevents silent kernel 98If this variable has a non-blank value, it prevents silent kernel
119config udpates (requires explicit updates). 99config udpates (requires explicit updates).
120 100
121KCONFIG_CONFIG
122--------------------------------------------------
123This environment variable can be used to specify a default kernel config
124file name to override the default name of ".config".
125
126KCONFIG_OVERWRITECONFIG
127--------------------------------------------------
128If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
129break symlinks when .config is a symlink to somewhere else.
130
131KCONFIG_NOTIMESTAMP
132--------------------------------------------------
133If this environment variable exists and is non-null, the timestamp line
134in generated .config files is omitted.
135
136KCONFIG_AUTOCONFIG 101KCONFIG_AUTOCONFIG
137-------------------------------------------------- 102--------------------------------------------------
138This environment variable can be set to specify the path & name of the 103This environment variable can be set to specify the path & name of the
@@ -143,15 +108,54 @@ KCONFIG_AUTOHEADER
143This environment variable can be set to specify the path & name of the 108This environment variable can be set to specify the path & name of the
144"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h". 109"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h".
145 110
111
112======================================================================
113menuconfig
114--------------------------------------------------
115
116SEARCHING for CONFIG symbols
117
118Searching in menuconfig:
119
120 The Search function searches for kernel configuration symbol
121 names, so you have to know something close to what you are
122 looking for.
123
124 Example:
125 /hotplug
126 This lists all config symbols that contain "hotplug",
127 e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG.
128
129 For search help, enter / followed TAB-TAB-TAB (to highlight
130 <Help>) and Enter. This will tell you that you can also use
131 regular expressions (regexes) in the search string, so if you
132 are not interested in MEMORY_HOTPLUG, you could try
133
134 /^hotplug
135
146______________________________________________________________________ 136______________________________________________________________________
147menuconfig User Interface Options 137User interface options for 'menuconfig'
148---------------------------------------------------------------------- 138
139MENUCONFIG_COLOR
140--------------------------------------------------
141It is possible to select different color themes using the variable
142MENUCONFIG_COLOR. To select a theme use:
143
144 make MENUCONFIG_COLOR=<theme> menuconfig
145
146Available themes are:
147 mono => selects colors suitable for monochrome displays
148 blackbg => selects a color scheme with black background
149 classic => theme with blue background. The classic look
150 bluetitle => a LCD friendly version of classic. (default)
151
149MENUCONFIG_MODE 152MENUCONFIG_MODE
150-------------------------------------------------- 153--------------------------------------------------
151This mode shows all sub-menus in one large tree. 154This mode shows all sub-menus in one large tree.
152 155
153Example: 156Example:
154 MENUCONFIG_MODE=single_menu make menuconfig 157 make MENUCONFIG_MODE=single_menu menuconfig
158
155 159
156====================================================================== 160======================================================================
157xconfig 161xconfig
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index b1096da953c8..0767cf69c69e 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -275,7 +275,7 @@ following files:
275 275
276 KERNELDIR := /lib/modules/`uname -r`/build 276 KERNELDIR := /lib/modules/`uname -r`/build
277 all:: 277 all::
278 $(MAKE) -C $KERNELDIR M=`pwd` $@ 278 $(MAKE) -C $(KERNELDIR) M=`pwd` $@
279 279
280 # Module specific targets 280 # Module specific targets
281 genbin: 281 genbin:
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 3f4bc840da8b..cab61d842259 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -108,7 +108,7 @@ There are two possible methods of using Kdump.
108 108
1092) Or use the system kernel binary itself as dump-capture kernel and there is 1092) Or use the system kernel binary itself as dump-capture kernel and there is
110 no need to build a separate dump-capture kernel. This is possible 110 no need to build a separate dump-capture kernel. This is possible
111 only with the architecutres which support a relocatable kernel. As 111 only with the architectures which support a relocatable kernel. As
112 of today, i386, x86_64, ppc64 and ia64 architectures support relocatable 112 of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
113 kernel. 113 kernel.
114 114
@@ -222,7 +222,7 @@ Dump-capture kernel config options (Arch Dependent, ia64)
222---------------------------------------------------------- 222----------------------------------------------------------
223 223
224- No specific options are required to create a dump-capture kernel 224- No specific options are required to create a dump-capture kernel
225 for ia64, other than those specified in the arch idependent section 225 for ia64, other than those specified in the arch independent section
226 above. This means that it is possible to use the system kernel 226 above. This means that it is possible to use the system kernel
227 as a dump-capture kernel if desired. 227 as a dump-capture kernel if desired.
228 228
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index fd5cac013037..ad3800630772 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -56,7 +56,6 @@ parameter is applicable:
56 ISAPNP ISA PnP code is enabled. 56 ISAPNP ISA PnP code is enabled.
57 ISDN Appropriate ISDN support is enabled. 57 ISDN Appropriate ISDN support is enabled.
58 JOY Appropriate joystick support is enabled. 58 JOY Appropriate joystick support is enabled.
59 KMEMTRACE kmemtrace is enabled.
60 LIBATA Libata driver is enabled 59 LIBATA Libata driver is enabled
61 LP Printer support is enabled. 60 LP Printer support is enabled.
62 LOOP Loopback device support is enabled. 61 LOOP Loopback device support is enabled.
@@ -329,11 +328,6 @@ and is between 256 and 4096 characters. It is defined in the file
329 flushed before they will be reused, which 328 flushed before they will be reused, which
330 is a lot of faster 329 is a lot of faster
331 330
332 amd_iommu_size= [HW,X86-64]
333 Define the size of the aperture for the AMD IOMMU
334 driver. Possible values are:
335 '32M', '64M' (default), '128M', '256M', '512M', '1G'
336
337 amijoy.map= [HW,JOY] Amiga joystick support 331 amijoy.map= [HW,JOY] Amiga joystick support
338 Map of devices attached to JOY0DAT and JOY1DAT 332 Map of devices attached to JOY0DAT and JOY1DAT
339 Format: <a>,<b> 333 Format: <a>,<b>
@@ -497,6 +491,13 @@ and is between 256 and 4096 characters. It is defined in the file
497 Also note the kernel might malfunction if you disable 491 Also note the kernel might malfunction if you disable
498 some critical bits. 492 some critical bits.
499 493
494 cmo_free_hint= [PPC] Format: { yes | no }
495 Specify whether pages are marked as being inactive
496 when they are freed. This is used in CMO environments
497 to determine OS memory pressure for page stealing by
498 a hypervisor.
499 Default: yes
500
500 code_bytes [X86] How many bytes of object code to print 501 code_bytes [X86] How many bytes of object code to print
501 in an oops report. 502 in an oops report.
502 Range: 0 - 8192 503 Range: 0 - 8192
@@ -646,6 +647,13 @@ and is between 256 and 4096 characters. It is defined in the file
646 DMA-API debugging code disables itself because the 647 DMA-API debugging code disables itself because the
647 architectural default is too low. 648 architectural default is too low.
648 649
650 dma_debug_driver=<driver_name>
651 With this option the DMA-API debugging driver
652 filter feature can be enabled at boot time. Just
653 pass the driver to filter for as the parameter.
654 The filter can be disabled or changed to another
655 driver later using sysfs.
656
649 dscc4.setup= [NET] 657 dscc4.setup= [NET]
650 658
651 dtc3181e= [HW,SCSI] 659 dtc3181e= [HW,SCSI]
@@ -752,12 +760,25 @@ and is between 256 and 4096 characters. It is defined in the file
752 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. 760 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
753 761
754 ftrace=[tracer] 762 ftrace=[tracer]
755 [ftrace] will set and start the specified tracer 763 [FTRACE] will set and start the specified tracer
756 as early as possible in order to facilitate early 764 as early as possible in order to facilitate early
757 boot debugging. 765 boot debugging.
758 766
759 ftrace_dump_on_oops 767 ftrace_dump_on_oops
760 [ftrace] will dump the trace buffers on oops. 768 [FTRACE] will dump the trace buffers on oops.
769
770 ftrace_filter=[function-list]
771 [FTRACE] Limit the functions traced by the function
772 tracer at boot up. function-list is a comma separated
773 list of functions. This list can be changed at run
774 time by the set_ftrace_filter file in the debugfs
775 tracing directory.
776
777 ftrace_notrace=[function-list]
778 [FTRACE] Do not trace the functions specified in
779 function-list. This list can be changed at run time
780 by the set_ftrace_notrace file in the debugfs
781 tracing directory.
761 782
762 gamecon.map[2|3]= 783 gamecon.map[2|3]=
763 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 784 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -873,11 +894,8 @@ and is between 256 and 4096 characters. It is defined in the file
873 894
874 ide-core.nodma= [HW] (E)IDE subsystem 895 ide-core.nodma= [HW] (E)IDE subsystem
875 Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc 896 Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
876 .vlb_clock .pci_clock .noflush .noprobe .nowerr .cdrom 897 .vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
877 .chs .ignore_cable are additional options 898 .cdrom .chs .ignore_cable are additional options
878 See Documentation/ide/ide.txt.
879
880 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
881 See Documentation/ide/ide.txt. 899 See Documentation/ide/ide.txt.
882 900
883 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem 901 ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
@@ -914,6 +932,12 @@ and is between 256 and 4096 characters. It is defined in the file
914 Formt: { "sha1" | "md5" } 932 Formt: { "sha1" | "md5" }
915 default: "sha1" 933 default: "sha1"
916 934
935 ima_tcb [IMA]
936 Load a policy which meets the needs of the Trusted
937 Computing Base. This means IMA will measure all
938 programs exec'd, files mmap'd for exec, and all files
939 opened for read by uid=0.
940
917 in2000= [HW,SCSI] 941 in2000= [HW,SCSI]
918 See header of drivers/scsi/in2000.c. 942 See header of drivers/scsi/in2000.c.
919 943
@@ -1054,24 +1078,19 @@ and is between 256 and 4096 characters. It is defined in the file
1054 use the HighMem zone if it exists, and the Normal 1078 use the HighMem zone if it exists, and the Normal
1055 zone if it does not. 1079 zone if it does not.
1056 1080
1057 kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
1058 Controls whether kmemtrace is enabled
1059 at boot-time.
1060
1061 kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
1062 subbufs kmemtrace's relay channel has. Set this
1063 higher than default (KMEMTRACE_N_SUBBUFS in code) if
1064 you experience buffer overruns.
1065
1066 kgdboc= [HW] kgdb over consoles. 1081 kgdboc= [HW] kgdb over consoles.
1067 Requires a tty driver that supports console polling. 1082 Requires a tty driver that supports console polling.
1068 (only serial suported for now) 1083 (only serial supported for now)
1069 Format: <serial_device>[,baud] 1084 Format: <serial_device>[,baud]
1070 1085
1071 kmac= [MIPS] korina ethernet MAC address. 1086 kmac= [MIPS] korina ethernet MAC address.
1072 Configure the RouterBoard 532 series on-chip 1087 Configure the RouterBoard 532 series on-chip
1073 Ethernet adapter MAC address. 1088 Ethernet adapter MAC address.
1074 1089
1090 kmemleak= [KNL] Boot-time kmemleak enable/disable
1091 Valid arguments: on, off
1092 Default: on
1093
1075 kstack=N [X86] Print N words from the kernel stack 1094 kstack=N [X86] Print N words from the kernel stack
1076 in oops dumps. 1095 in oops dumps.
1077 1096
@@ -1390,7 +1409,7 @@ and is between 256 and 4096 characters. It is defined in the file
1390 ('y', default) or cooked coordinates ('n') 1409 ('y', default) or cooked coordinates ('n')
1391 1410
1392 mtrr_chunk_size=nn[KMG] [X86] 1411 mtrr_chunk_size=nn[KMG] [X86]
1393 used for mtrr cleanup. It is largest continous chunk 1412 used for mtrr cleanup. It is largest continuous chunk
1394 that could hold holes aka. UC entries. 1413 that could hold holes aka. UC entries.
1395 1414
1396 mtrr_gran_size=nn[KMG] [X86] 1415 mtrr_gran_size=nn[KMG] [X86]
@@ -1575,6 +1594,9 @@ and is between 256 and 4096 characters. It is defined in the file
1575 noinitrd [RAM] Tells the kernel not to load any configured 1594 noinitrd [RAM] Tells the kernel not to load any configured
1576 initial RAM disk. 1595 initial RAM disk.
1577 1596
1597 nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
1598 remapping.
1599
1578 nointroute [IA-64] 1600 nointroute [IA-64]
1579 1601
1580 nojitter [IA64] Disables jitter checking for ITC timers. 1602 nojitter [IA64] Disables jitter checking for ITC timers.
@@ -1660,6 +1682,14 @@ and is between 256 and 4096 characters. It is defined in the file
1660 oprofile.timer= [HW] 1682 oprofile.timer= [HW]
1661 Use timer interrupt instead of performance counters 1683 Use timer interrupt instead of performance counters
1662 1684
1685 oprofile.cpu_type= Force an oprofile cpu type
1686 This might be useful if you have an older oprofile
1687 userland or if you want common events.
1688 Format: { archperfmon }
1689 archperfmon: [X86] Force use of architectural
1690 perfmon on Intel CPUs instead of the
1691 CPU specific event set.
1692
1663 osst= [HW,SCSI] SCSI Tape Driver 1693 osst= [HW,SCSI] SCSI Tape Driver
1664 Format: <buffer_size>,<write_threshold> 1694 Format: <buffer_size>,<write_threshold>
1665 See also Documentation/scsi/st.txt. 1695 See also Documentation/scsi/st.txt.
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
new file mode 100644
index 000000000000..0112da3b9ab8
--- /dev/null
+++ b/Documentation/kmemleak.txt
@@ -0,0 +1,142 @@
1Kernel Memory Leak Detector
2===========================
3
4Introduction
5------------
6
7Kmemleak provides a way of detecting possible kernel memory leaks in a
8way similar to a tracing garbage collector
9(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
10with the difference that the orphan objects are not freed but only
11reported via /sys/kernel/debug/kmemleak. A similar method is used by the
12Valgrind tool (memcheck --leak-check) to detect the memory leaks in
13user-space applications.
14
15Usage
16-----
17
18CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
19thread scans the memory every 10 minutes (by default) and prints any new
20unreferenced objects found. To trigger an intermediate scan and display
21all the possible memory leaks:
22
23 # mount -t debugfs nodev /sys/kernel/debug/
24 # cat /sys/kernel/debug/kmemleak
25
26Note that the orphan objects are listed in the order they were allocated
27and one object at the beginning of the list may cause other subsequent
28objects to be reported as orphan.
29
30Memory scanning parameters can be modified at run-time by writing to the
31/sys/kernel/debug/kmemleak file. The following parameters are supported:
32
33 off - disable kmemleak (irreversible)
34 stack=on - enable the task stacks scanning
35 stack=off - disable the tasks stacks scanning
36 scan=on - start the automatic memory scanning thread
37 scan=off - stop the automatic memory scanning thread
38 scan=<secs> - set the automatic memory scanning period in seconds (0
39 to disable it)
40
41Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
42the kernel command line.
43
44Basic Algorithm
45---------------
46
47The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
48friends are traced and the pointers, together with additional
49information like size and stack trace, are stored in a prio search tree.
50The corresponding freeing function calls are tracked and the pointers
51removed from the kmemleak data structures.
52
53An allocated block of memory is considered orphan if no pointer to its
54start address or to any location inside the block can be found by
55scanning the memory (including saved registers). This means that there
56might be no way for the kernel to pass the address of the allocated
57block to a freeing function and therefore the block is considered a
58memory leak.
59
60The scanning algorithm steps:
61
62 1. mark all objects as white (remaining white objects will later be
63 considered orphan)
64 2. scan the memory starting with the data section and stacks, checking
65 the values against the addresses stored in the prio search tree. If
66 a pointer to a white object is found, the object is added to the
67 gray list
68 3. scan the gray objects for matching addresses (some white objects
69 can become gray and added at the end of the gray list) until the
70 gray set is finished
71 4. the remaining white objects are considered orphan and reported via
72 /sys/kernel/debug/kmemleak
73
74Some allocated memory blocks have pointers stored in the kernel's
75internal data structures and they cannot be detected as orphans. To
76avoid this, kmemleak can also store the number of values pointing to an
77address inside the block address range that need to be found so that the
78block is not considered a leak. One example is __vmalloc().
79
80Kmemleak API
81------------
82
83See the include/linux/kmemleak.h header for the functions prototype.
84
85kmemleak_init - initialize kmemleak
86kmemleak_alloc - notify of a memory block allocation
87kmemleak_free - notify of a memory block freeing
88kmemleak_not_leak - mark an object as not a leak
89kmemleak_ignore - do not scan or report an object as leak
90kmemleak_scan_area - add scan areas inside a memory block
91kmemleak_no_scan - do not scan a memory block
92kmemleak_erase - erase an old value in a pointer variable
93kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness
94kmemleak_free_recursive - as kmemleak_free but checks the recursiveness
95
96Dealing with false positives/negatives
97--------------------------------------
98
99The false negatives are real memory leaks (orphan objects) but not
100reported by kmemleak because values found during the memory scanning
101point to such objects. To reduce the number of false negatives, kmemleak
102provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and
103kmemleak_erase functions (see above). The task stacks also increase the
104amount of false negatives and their scanning is not enabled by default.
105
106The false positives are objects wrongly reported as being memory leaks
107(orphan). For objects known not to be leaks, kmemleak provides the
108kmemleak_not_leak function. The kmemleak_ignore could also be used if
109the memory block is known not to contain other pointers and it will no
110longer be scanned.
111
112Some of the reported leaks are only transient, especially on SMP
113systems, because of pointers temporarily stored in CPU registers or
114stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing
115the minimum age of an object to be reported as a memory leak.
116
117Limitations and Drawbacks
118-------------------------
119
120The main drawback is the reduced performance of memory allocation and
121freeing. To avoid other penalties, the memory scanning is only performed
122when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is
123intended for debugging purposes where the performance might not be the
124most important requirement.
125
126To keep the algorithm simple, kmemleak scans for values pointing to any
127address inside a block's address range. This may lead to an increased
128number of false negatives. However, it is likely that a real memory leak
129will eventually become visible.
130
131Another source of false negatives is the data stored in non-pointer
132values. In a future version, kmemleak could only scan the pointer
133members in the allocated structures. This feature would solve many of
134the false negative cases described above.
135
136The tool can report false positives. These are cases where an allocated
137block doesn't need to be freed (some cases in the init_call functions),
138the pointer is calculated by other methods than the usual container_of
139macro or the pointer is stored in a location not scanned by kmemleak.
140
141Page allocations and ioremap are not tracked. Only the ARM and x86
142architectures are currently supported.
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index b2e374586bd8..c79ab996dada 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -132,7 +132,7 @@ kobject_name():
132 const char *kobject_name(const struct kobject * kobj); 132 const char *kobject_name(const struct kobject * kobj);
133 133
134There is a helper function to both initialize and add the kobject to the 134There is a helper function to both initialize and add the kobject to the
135kernel at the same time, called supprisingly enough kobject_init_and_add(): 135kernel at the same time, called surprisingly enough kobject_init_and_add():
136 136
137 int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, 137 int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
138 struct kobject *parent, const char *fmt, ...); 138 struct kobject *parent, const char *fmt, ...);
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt
index 5ee2a02b3b40..0768fcc3ba3e 100644
--- a/Documentation/laptops/acer-wmi.txt
+++ b/Documentation/laptops/acer-wmi.txt
@@ -40,7 +40,7 @@ NOTE: The Acer Aspire One is not supported hardware. It cannot work with
40acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been 40acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been
41blacklisted until that happens. 41blacklisted until that happens.
42 42
43Please see the website for the current list of known working hardare: 43Please see the website for the current list of known working hardware:
44 44
45http://code.google.com/p/aceracpi/wiki/SupportedHardware 45http://code.google.com/p/aceracpi/wiki/SupportedHardware
46 46
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt
index 8b2bc1572d98..23ce7d350d1a 100644
--- a/Documentation/laptops/sony-laptop.txt
+++ b/Documentation/laptops/sony-laptop.txt
@@ -22,7 +22,7 @@ If your laptop model supports it, you will find sysfs files in the
22/sys/class/backlight/sony/ 22/sys/class/backlight/sony/
23directory. You will be able to query and set the current screen 23directory. You will be able to query and set the current screen
24brightness: 24brightness:
25 brightness get/set screen brightness (an iteger 25 brightness get/set screen brightness (an integer
26 between 0 and 7) 26 between 0 and 7)
27 actual_brightness reading from this file will query the HW 27 actual_brightness reading from this file will query the HW
28 to get real brightness value 28 to get real brightness value
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index e7e9a69069e1..78e354b42f67 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -506,7 +506,7 @@ generate input device EV_KEY events.
506In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW 506In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
507events for switches: 507events for switches:
508 508
509SW_RFKILL_ALL T60 and later hardare rfkill rocker switch 509SW_RFKILL_ALL T60 and later hardware rfkill rocker switch
510SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A 510SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A
511 511
512Non hot-key ACPI HKEY event map: 512Non hot-key ACPI HKEY event map:
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 1f4f9e888bd1..28c8cdfcafd8 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -1,6 +1,5 @@
1# This creates the demonstration utility "lguest" which runs a Linux guest. 1# This creates the demonstration utility "lguest" which runs a Linux guest.
2CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE 2CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -I../../include -I../../arch/x86/include -U_FORTIFY_SOURCE
3LDLIBS:=-lz
4 3
5all: lguest 4all: lguest
6 5
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index d36fcc0f2715..9ebcd6ef361b 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -16,6 +16,7 @@
16#include <sys/types.h> 16#include <sys/types.h>
17#include <sys/stat.h> 17#include <sys/stat.h>
18#include <sys/wait.h> 18#include <sys/wait.h>
19#include <sys/eventfd.h>
19#include <fcntl.h> 20#include <fcntl.h>
20#include <stdbool.h> 21#include <stdbool.h>
21#include <errno.h> 22#include <errno.h>
@@ -59,7 +60,6 @@ typedef uint8_t u8;
59/*:*/ 60/*:*/
60 61
61#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ 62#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
62#define NET_PEERNUM 1
63#define BRIDGE_PFX "bridge:" 63#define BRIDGE_PFX "bridge:"
64#ifndef SIOCBRADDIF 64#ifndef SIOCBRADDIF
65#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ 65#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
@@ -76,19 +76,12 @@ static bool verbose;
76 do { if (verbose) printf(args); } while(0) 76 do { if (verbose) printf(args); } while(0)
77/*:*/ 77/*:*/
78 78
79/* File descriptors for the Waker. */
80struct {
81 int pipe[2];
82 int lguest_fd;
83} waker_fds;
84
85/* The pointer to the start of guest memory. */ 79/* The pointer to the start of guest memory. */
86static void *guest_base; 80static void *guest_base;
87/* The maximum guest physical address allowed, and maximum possible. */ 81/* The maximum guest physical address allowed, and maximum possible. */
88static unsigned long guest_limit, guest_max; 82static unsigned long guest_limit, guest_max;
89/* The pipe for signal hander to write to. */ 83/* The /dev/lguest file descriptor. */
90static int timeoutpipe[2]; 84static int lguest_fd;
91static unsigned int timeout_usec = 500;
92 85
93/* a per-cpu variable indicating whose vcpu is currently running */ 86/* a per-cpu variable indicating whose vcpu is currently running */
94static unsigned int __thread cpu_id; 87static unsigned int __thread cpu_id;
@@ -96,11 +89,6 @@ static unsigned int __thread cpu_id;
96/* This is our list of devices. */ 89/* This is our list of devices. */
97struct device_list 90struct device_list
98{ 91{
99 /* Summary information about the devices in our list: ready to pass to
100 * select() to ask which need servicing.*/
101 fd_set infds;
102 int max_infd;
103
104 /* Counter to assign interrupt numbers. */ 92 /* Counter to assign interrupt numbers. */
105 unsigned int next_irq; 93 unsigned int next_irq;
106 94
@@ -126,22 +114,21 @@ struct device
126 /* The linked-list pointer. */ 114 /* The linked-list pointer. */
127 struct device *next; 115 struct device *next;
128 116
129 /* The this device's descriptor, as mapped into the Guest. */ 117 /* The device's descriptor, as mapped into the Guest. */
130 struct lguest_device_desc *desc; 118 struct lguest_device_desc *desc;
131 119
120 /* We can't trust desc values once Guest has booted: we use these. */
121 unsigned int feature_len;
122 unsigned int num_vq;
123
132 /* The name of this device, for --verbose. */ 124 /* The name of this device, for --verbose. */
133 const char *name; 125 const char *name;
134 126
135 /* If handle_input is set, it wants to be called when this file
136 * descriptor is ready. */
137 int fd;
138 bool (*handle_input)(int fd, struct device *me);
139
140 /* Any queues attached to this device */ 127 /* Any queues attached to this device */
141 struct virtqueue *vq; 128 struct virtqueue *vq;
142 129
143 /* Handle status being finalized (ie. feature bits stable). */ 130 /* Is it operational */
144 void (*ready)(struct device *me); 131 bool running;
145 132
146 /* Device-specific data. */ 133 /* Device-specific data. */
147 void *priv; 134 void *priv;
@@ -164,22 +151,28 @@ struct virtqueue
164 /* Last available index we saw. */ 151 /* Last available index we saw. */
165 u16 last_avail_idx; 152 u16 last_avail_idx;
166 153
167 /* The routine to call when the Guest pings us, or timeout. */ 154 /* How many are used since we sent last irq? */
168 void (*handle_output)(int fd, struct virtqueue *me, bool timeout); 155 unsigned int pending_used;
169 156
170 /* Outstanding buffers */ 157 /* Eventfd where Guest notifications arrive. */
171 unsigned int inflight; 158 int eventfd;
172 159
173 /* Is this blocked awaiting a timer? */ 160 /* Function for the thread which is servicing this virtqueue. */
174 bool blocked; 161 void (*service)(struct virtqueue *vq);
162 pid_t thread;
175}; 163};
176 164
177/* Remember the arguments to the program so we can "reboot" */ 165/* Remember the arguments to the program so we can "reboot" */
178static char **main_args; 166static char **main_args;
179 167
180/* Since guest is UP and we don't run at the same time, we don't need barriers. 168/* The original tty settings to restore on exit. */
181 * But I include them in the code in case others copy it. */ 169static struct termios orig_term;
182#define wmb() 170
171/* We have to be careful with barriers: our devices are all run in separate
172 * threads and so we need to make sure that changes visible to the Guest happen
173 * in precise order. */
174#define wmb() __asm__ __volatile__("" : : : "memory")
175#define mb() __asm__ __volatile__("" : : : "memory")
183 176
184/* Convert an iovec element to the given type. 177/* Convert an iovec element to the given type.
185 * 178 *
@@ -245,7 +238,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
245static u8 *get_feature_bits(struct device *dev) 238static u8 *get_feature_bits(struct device *dev)
246{ 239{
247 return (u8 *)(dev->desc + 1) 240 return (u8 *)(dev->desc + 1)
248 + dev->desc->num_vq * sizeof(struct lguest_vqconfig); 241 + dev->num_vq * sizeof(struct lguest_vqconfig);
249} 242}
250 243
251/*L:100 The Launcher code itself takes us out into userspace, that scary place 244/*L:100 The Launcher code itself takes us out into userspace, that scary place
@@ -505,99 +498,19 @@ static void concat(char *dst, char *args[])
505 * saw the arguments it expects when we looked at initialize() in lguest_user.c: 498 * saw the arguments it expects when we looked at initialize() in lguest_user.c:
506 * the base of Guest "physical" memory, the top physical page to allow and the 499 * the base of Guest "physical" memory, the top physical page to allow and the
507 * entry point for the Guest. */ 500 * entry point for the Guest. */
508static int tell_kernel(unsigned long start) 501static void tell_kernel(unsigned long start)
509{ 502{
510 unsigned long args[] = { LHREQ_INITIALIZE, 503 unsigned long args[] = { LHREQ_INITIALIZE,
511 (unsigned long)guest_base, 504 (unsigned long)guest_base,
512 guest_limit / getpagesize(), start }; 505 guest_limit / getpagesize(), start };
513 int fd;
514
515 verbose("Guest: %p - %p (%#lx)\n", 506 verbose("Guest: %p - %p (%#lx)\n",
516 guest_base, guest_base + guest_limit, guest_limit); 507 guest_base, guest_base + guest_limit, guest_limit);
517 fd = open_or_die("/dev/lguest", O_RDWR); 508 lguest_fd = open_or_die("/dev/lguest", O_RDWR);
518 if (write(fd, args, sizeof(args)) < 0) 509 if (write(lguest_fd, args, sizeof(args)) < 0)
519 err(1, "Writing to /dev/lguest"); 510 err(1, "Writing to /dev/lguest");
520
521 /* We return the /dev/lguest file descriptor to control this Guest */
522 return fd;
523} 511}
524/*:*/ 512/*:*/
525 513
526static void add_device_fd(int fd)
527{
528 FD_SET(fd, &devices.infds);
529 if (fd > devices.max_infd)
530 devices.max_infd = fd;
531}
532
533/*L:200
534 * The Waker.
535 *
536 * With console, block and network devices, we can have lots of input which we
537 * need to process. We could try to tell the kernel what file descriptors to
538 * watch, but handing a file descriptor mask through to the kernel is fairly
539 * icky.
540 *
541 * Instead, we clone off a thread which watches the file descriptors and writes
542 * the LHREQ_BREAK command to the /dev/lguest file descriptor to tell the Host
543 * stop running the Guest. This causes the Launcher to return from the
544 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
545 * the LHREQ_BREAK and wake us up again.
546 *
547 * This, of course, is merely a different *kind* of icky.
548 *
549 * Given my well-known antipathy to threads, I'd prefer to use processes. But
550 * it's easier to share Guest memory with threads, and trivial to share the
551 * devices.infds as the Launcher changes it.
552 */
553static int waker(void *unused)
554{
555 /* Close the write end of the pipe: only the Launcher has it open. */
556 close(waker_fds.pipe[1]);
557
558 for (;;) {
559 fd_set rfds = devices.infds;
560 unsigned long args[] = { LHREQ_BREAK, 1 };
561 unsigned int maxfd = devices.max_infd;
562
563 /* We also listen to the pipe from the Launcher. */
564 FD_SET(waker_fds.pipe[0], &rfds);
565 if (waker_fds.pipe[0] > maxfd)
566 maxfd = waker_fds.pipe[0];
567
568 /* Wait until input is ready from one of the devices. */
569 select(maxfd+1, &rfds, NULL, NULL, NULL);
570
571 /* Message from Launcher? */
572 if (FD_ISSET(waker_fds.pipe[0], &rfds)) {
573 char c;
574 /* If this fails, then assume Launcher has exited.
575 * Don't do anything on exit: we're just a thread! */
576 if (read(waker_fds.pipe[0], &c, 1) != 1)
577 _exit(0);
578 continue;
579 }
580
581 /* Send LHREQ_BREAK command to snap the Launcher out of it. */
582 pwrite(waker_fds.lguest_fd, args, sizeof(args), cpu_id);
583 }
584 return 0;
585}
586
587/* This routine just sets up a pipe to the Waker process. */
588static void setup_waker(int lguest_fd)
589{
590 /* This pipe is closed when Launcher dies, telling Waker. */
591 if (pipe(waker_fds.pipe) != 0)
592 err(1, "Creating pipe for Waker");
593
594 /* Waker also needs to know the lguest fd */
595 waker_fds.lguest_fd = lguest_fd;
596
597 if (clone(waker, malloc(4096) + 4096, CLONE_VM | SIGCHLD, NULL) == -1)
598 err(1, "Creating Waker");
599}
600
601/* 514/*
602 * Device Handling. 515 * Device Handling.
603 * 516 *
@@ -623,49 +536,90 @@ static void *_check_pointer(unsigned long addr, unsigned int size,
623/* Each buffer in the virtqueues is actually a chain of descriptors. This 536/* Each buffer in the virtqueues is actually a chain of descriptors. This
624 * function returns the next descriptor in the chain, or vq->vring.num if we're 537 * function returns the next descriptor in the chain, or vq->vring.num if we're
625 * at the end. */ 538 * at the end. */
626static unsigned next_desc(struct virtqueue *vq, unsigned int i) 539static unsigned next_desc(struct vring_desc *desc,
540 unsigned int i, unsigned int max)
627{ 541{
628 unsigned int next; 542 unsigned int next;
629 543
630 /* If this descriptor says it doesn't chain, we're done. */ 544 /* If this descriptor says it doesn't chain, we're done. */
631 if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT)) 545 if (!(desc[i].flags & VRING_DESC_F_NEXT))
632 return vq->vring.num; 546 return max;
633 547
634 /* Check they're not leading us off end of descriptors. */ 548 /* Check they're not leading us off end of descriptors. */
635 next = vq->vring.desc[i].next; 549 next = desc[i].next;
636 /* Make sure compiler knows to grab that: we don't want it changing! */ 550 /* Make sure compiler knows to grab that: we don't want it changing! */
637 wmb(); 551 wmb();
638 552
639 if (next >= vq->vring.num) 553 if (next >= max)
640 errx(1, "Desc next is %u", next); 554 errx(1, "Desc next is %u", next);
641 555
642 return next; 556 return next;
643} 557}
644 558
559/* This actually sends the interrupt for this virtqueue */
560static void trigger_irq(struct virtqueue *vq)
561{
562 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
563
564 /* Don't inform them if nothing used. */
565 if (!vq->pending_used)
566 return;
567 vq->pending_used = 0;
568
569 /* If they don't want an interrupt, don't send one, unless empty. */
570 if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
571 && lg_last_avail(vq) != vq->vring.avail->idx)
572 return;
573
574 /* Send the Guest an interrupt tell them we used something up. */
575 if (write(lguest_fd, buf, sizeof(buf)) != 0)
576 err(1, "Triggering irq %i", vq->config.irq);
577}
578
645/* This looks in the virtqueue and for the first available buffer, and converts 579/* This looks in the virtqueue and for the first available buffer, and converts
646 * it to an iovec for convenient access. Since descriptors consist of some 580 * it to an iovec for convenient access. Since descriptors consist of some
647 * number of output then some number of input descriptors, it's actually two 581 * number of output then some number of input descriptors, it's actually two
648 * iovecs, but we pack them into one and note how many of each there were. 582 * iovecs, but we pack them into one and note how many of each there were.
649 * 583 *
650 * This function returns the descriptor number found, or vq->vring.num (which 584 * This function returns the descriptor number found. */
651 * is never a valid descriptor number) if none was found. */ 585static unsigned wait_for_vq_desc(struct virtqueue *vq,
652static unsigned get_vq_desc(struct virtqueue *vq, 586 struct iovec iov[],
653 struct iovec iov[], 587 unsigned int *out_num, unsigned int *in_num)
654 unsigned int *out_num, unsigned int *in_num)
655{ 588{
656 unsigned int i, head; 589 unsigned int i, head, max;
657 u16 last_avail; 590 struct vring_desc *desc;
591 u16 last_avail = lg_last_avail(vq);
592
593 while (last_avail == vq->vring.avail->idx) {
594 u64 event;
595
596 /* OK, tell Guest about progress up to now. */
597 trigger_irq(vq);
598
599 /* OK, now we need to know about added descriptors. */
600 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
601
602 /* They could have slipped one in as we were doing that: make
603 * sure it's written, then check again. */
604 mb();
605 if (last_avail != vq->vring.avail->idx) {
606 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
607 break;
608 }
609
610 /* Nothing new? Wait for eventfd to tell us they refilled. */
611 if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
612 errx(1, "Event read failed?");
613
614 /* We don't need to be notified again. */
615 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
616 }
658 617
659 /* Check it isn't doing very strange things with descriptor numbers. */ 618 /* Check it isn't doing very strange things with descriptor numbers. */
660 last_avail = lg_last_avail(vq);
661 if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) 619 if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
662 errx(1, "Guest moved used index from %u to %u", 620 errx(1, "Guest moved used index from %u to %u",
663 last_avail, vq->vring.avail->idx); 621 last_avail, vq->vring.avail->idx);
664 622
665 /* If there's nothing new since last we looked, return invalid. */
666 if (vq->vring.avail->idx == last_avail)
667 return vq->vring.num;
668
669 /* Grab the next descriptor number they're advertising, and increment 623 /* Grab the next descriptor number they're advertising, and increment
670 * the index we've seen. */ 624 * the index we've seen. */
671 head = vq->vring.avail->ring[last_avail % vq->vring.num]; 625 head = vq->vring.avail->ring[last_avail % vq->vring.num];
@@ -678,15 +632,28 @@ static unsigned get_vq_desc(struct virtqueue *vq,
678 /* When we start there are none of either input nor output. */ 632 /* When we start there are none of either input nor output. */
679 *out_num = *in_num = 0; 633 *out_num = *in_num = 0;
680 634
635 max = vq->vring.num;
636 desc = vq->vring.desc;
681 i = head; 637 i = head;
638
639 /* If this is an indirect entry, then this buffer contains a descriptor
640 * table which we handle as if it's any normal descriptor chain. */
641 if (desc[i].flags & VRING_DESC_F_INDIRECT) {
642 if (desc[i].len % sizeof(struct vring_desc))
643 errx(1, "Invalid size for indirect buffer table");
644
645 max = desc[i].len / sizeof(struct vring_desc);
646 desc = check_pointer(desc[i].addr, desc[i].len);
647 i = 0;
648 }
649
682 do { 650 do {
683 /* Grab the first descriptor, and check it's OK. */ 651 /* Grab the first descriptor, and check it's OK. */
684 iov[*out_num + *in_num].iov_len = vq->vring.desc[i].len; 652 iov[*out_num + *in_num].iov_len = desc[i].len;
685 iov[*out_num + *in_num].iov_base 653 iov[*out_num + *in_num].iov_base
686 = check_pointer(vq->vring.desc[i].addr, 654 = check_pointer(desc[i].addr, desc[i].len);
687 vq->vring.desc[i].len);
688 /* If this is an input descriptor, increment that count. */ 655 /* If this is an input descriptor, increment that count. */
689 if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE) 656 if (desc[i].flags & VRING_DESC_F_WRITE)
690 (*in_num)++; 657 (*in_num)++;
691 else { 658 else {
692 /* If it's an output descriptor, they're all supposed 659 /* If it's an output descriptor, they're all supposed
@@ -697,11 +664,10 @@ static unsigned get_vq_desc(struct virtqueue *vq,
697 } 664 }
698 665
699 /* If we've got too many, that implies a descriptor loop. */ 666 /* If we've got too many, that implies a descriptor loop. */
700 if (*out_num + *in_num > vq->vring.num) 667 if (*out_num + *in_num > max)
701 errx(1, "Looped descriptor"); 668 errx(1, "Looped descriptor");
702 } while ((i = next_desc(vq, i)) != vq->vring.num); 669 } while ((i = next_desc(desc, i, max)) != max);
703 670
704 vq->inflight++;
705 return head; 671 return head;
706} 672}
707 673
@@ -719,44 +685,20 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
719 /* Make sure buffer is written before we update index. */ 685 /* Make sure buffer is written before we update index. */
720 wmb(); 686 wmb();
721 vq->vring.used->idx++; 687 vq->vring.used->idx++;
722 vq->inflight--; 688 vq->pending_used++;
723}
724
725/* This actually sends the interrupt for this virtqueue */
726static void trigger_irq(int fd, struct virtqueue *vq)
727{
728 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
729
730 /* If they don't want an interrupt, don't send one, unless empty. */
731 if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
732 && vq->inflight)
733 return;
734
735 /* Send the Guest an interrupt tell them we used something up. */
736 if (write(fd, buf, sizeof(buf)) != 0)
737 err(1, "Triggering irq %i", vq->config.irq);
738} 689}
739 690
740/* And here's the combo meal deal. Supersize me! */ 691/* And here's the combo meal deal. Supersize me! */
741static void add_used_and_trigger(int fd, struct virtqueue *vq, 692static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
742 unsigned int head, int len)
743{ 693{
744 add_used(vq, head, len); 694 add_used(vq, head, len);
745 trigger_irq(fd, vq); 695 trigger_irq(vq);
746} 696}
747 697
748/* 698/*
749 * The Console 699 * The Console
750 * 700 *
751 * Here is the input terminal setting we save, and the routine to restore them 701 * We associate some data with the console for our exit hack. */
752 * on exit so the user gets their terminal back. */
753static struct termios orig_term;
754static void restore_term(void)
755{
756 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
757}
758
759/* We associate some data with the console for our exit hack. */
760struct console_abort 702struct console_abort
761{ 703{
762 /* How many times have they hit ^C? */ 704 /* How many times have they hit ^C? */
@@ -766,276 +708,275 @@ struct console_abort
766}; 708};
767 709
768/* This is the routine which handles console input (ie. stdin). */ 710/* This is the routine which handles console input (ie. stdin). */
769static bool handle_console_input(int fd, struct device *dev) 711static void console_input(struct virtqueue *vq)
770{ 712{
771 int len; 713 int len;
772 unsigned int head, in_num, out_num; 714 unsigned int head, in_num, out_num;
773 struct iovec iov[dev->vq->vring.num]; 715 struct console_abort *abort = vq->dev->priv;
774 struct console_abort *abort = dev->priv; 716 struct iovec iov[vq->vring.num];
775
776 /* First we need a console buffer from the Guests's input virtqueue. */
777 head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
778
779 /* If they're not ready for input, stop listening to this file
780 * descriptor. We'll start again once they add an input buffer. */
781 if (head == dev->vq->vring.num)
782 return false;
783 717
718 /* Make sure there's a descriptor waiting. */
719 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
784 if (out_num) 720 if (out_num)
785 errx(1, "Output buffers in console in queue?"); 721 errx(1, "Output buffers in console in queue?");
786 722
787 /* This is why we convert to iovecs: the readv() call uses them, and so 723 /* Read it in. */
788 * it reads straight into the Guest's buffer. */ 724 len = readv(STDIN_FILENO, iov, in_num);
789 len = readv(dev->fd, iov, in_num);
790 if (len <= 0) { 725 if (len <= 0) {
791 /* This implies that the console is closed, is /dev/null, or 726 /* Ran out of input? */
792 * something went terribly wrong. */
793 warnx("Failed to get console input, ignoring console."); 727 warnx("Failed to get console input, ignoring console.");
794 /* Put the input terminal back. */ 728 /* For simplicity, dying threads kill the whole Launcher. So
795 restore_term(); 729 * just nap here. */
796 /* Remove callback from input vq, so it doesn't restart us. */ 730 for (;;)
797 dev->vq->handle_output = NULL; 731 pause();
798 /* Stop listening to this fd: don't call us again. */
799 return false;
800 } 732 }
801 733
802 /* Tell the Guest about the new input. */ 734 add_used_and_trigger(vq, head, len);
803 add_used_and_trigger(fd, dev->vq, head, len);
804 735
805 /* Three ^C within one second? Exit. 736 /* Three ^C within one second? Exit.
806 * 737 *
807 * This is such a hack, but works surprisingly well. Each ^C has to be 738 * This is such a hack, but works surprisingly well. Each ^C has to
808 * in a buffer by itself, so they can't be too fast. But we check that 739 * be in a buffer by itself, so they can't be too fast. But we check
809 * we get three within about a second, so they can't be too slow. */ 740 * that we get three within about a second, so they can't be too
810 if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { 741 * slow. */
811 if (!abort->count++) 742 if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
812 gettimeofday(&abort->start, NULL);
813 else if (abort->count == 3) {
814 struct timeval now;
815 gettimeofday(&now, NULL);
816 if (now.tv_sec <= abort->start.tv_sec+1) {
817 unsigned long args[] = { LHREQ_BREAK, 0 };
818 /* Close the fd so Waker will know it has to
819 * exit. */
820 close(waker_fds.pipe[1]);
821 /* Just in case Waker is blocked in BREAK, send
822 * unbreak now. */
823 write(fd, args, sizeof(args));
824 exit(2);
825 }
826 abort->count = 0;
827 }
828 } else
829 /* Any other key resets the abort counter. */
830 abort->count = 0; 743 abort->count = 0;
744 return;
745 }
831 746
832 /* Everything went OK! */ 747 abort->count++;
833 return true; 748 if (abort->count == 1)
749 gettimeofday(&abort->start, NULL);
750 else if (abort->count == 3) {
751 struct timeval now;
752 gettimeofday(&now, NULL);
753 /* Kill all Launcher processes with SIGINT, like normal ^C */
754 if (now.tv_sec <= abort->start.tv_sec+1)
755 kill(0, SIGINT);
756 abort->count = 0;
757 }
834} 758}
835 759
836/* Handling output for console is simple: we just get all the output buffers 760/* This is the routine which handles console output (ie. stdout). */
837 * and write them to stdout. */ 761static void console_output(struct virtqueue *vq)
838static void handle_console_output(int fd, struct virtqueue *vq, bool timeout)
839{ 762{
840 unsigned int head, out, in; 763 unsigned int head, out, in;
841 int len;
842 struct iovec iov[vq->vring.num]; 764 struct iovec iov[vq->vring.num];
843 765
844 /* Keep getting output buffers from the Guest until we run out. */ 766 head = wait_for_vq_desc(vq, iov, &out, &in);
845 while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) { 767 if (in)
846 if (in) 768 errx(1, "Input buffers in console output queue?");
847 errx(1, "Input buffers in output queue?"); 769 while (!iov_empty(iov, out)) {
848 len = writev(STDOUT_FILENO, iov, out); 770 int len = writev(STDOUT_FILENO, iov, out);
849 add_used_and_trigger(fd, vq, head, len); 771 if (len <= 0)
772 err(1, "Write to stdout gave %i", len);
773 iov_consume(iov, out, len);
850 } 774 }
851} 775 add_used(vq, head, 0);
852
853/* This is called when we no longer want to hear about Guest changes to a
854 * virtqueue. This is more efficient in high-traffic cases, but it means we
855 * have to set a timer to check if any more changes have occurred. */
856static void block_vq(struct virtqueue *vq)
857{
858 struct itimerval itm;
859
860 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
861 vq->blocked = true;
862
863 itm.it_interval.tv_sec = 0;
864 itm.it_interval.tv_usec = 0;
865 itm.it_value.tv_sec = 0;
866 itm.it_value.tv_usec = timeout_usec;
867
868 setitimer(ITIMER_REAL, &itm, NULL);
869} 776}
870 777
871/* 778/*
872 * The Network 779 * The Network
873 * 780 *
874 * Handling output for network is also simple: we get all the output buffers 781 * Handling output for network is also simple: we get all the output buffers
875 * and write them (ignoring the first element) to this device's file descriptor 782 * and write them to /dev/net/tun.
876 * (/dev/net/tun).
877 */ 783 */
878static void handle_net_output(int fd, struct virtqueue *vq, bool timeout) 784struct net_info {
785 int tunfd;
786};
787
788static void net_output(struct virtqueue *vq)
879{ 789{
880 unsigned int head, out, in, num = 0; 790 struct net_info *net_info = vq->dev->priv;
881 int len; 791 unsigned int head, out, in;
882 struct iovec iov[vq->vring.num]; 792 struct iovec iov[vq->vring.num];
883 static int last_timeout_num;
884
885 /* Keep getting output buffers from the Guest until we run out. */
886 while ((head = get_vq_desc(vq, iov, &out, &in)) != vq->vring.num) {
887 if (in)
888 errx(1, "Input buffers in output queue?");
889 len = writev(vq->dev->fd, iov, out);
890 if (len < 0)
891 err(1, "Writing network packet to tun");
892 add_used_and_trigger(fd, vq, head, len);
893 num++;
894 }
895 793
896 /* Block further kicks and set up a timer if we saw anything. */ 794 head = wait_for_vq_desc(vq, iov, &out, &in);
897 if (!timeout && num) 795 if (in)
898 block_vq(vq); 796 errx(1, "Input buffers in net output queue?");
899 797 if (writev(net_info->tunfd, iov, out) < 0)
900 /* We never quite know how long should we wait before we check the 798 errx(1, "Write to tun failed?");
901 * queue again for more packets. We start at 500 microseconds, and if 799 add_used(vq, head, 0);
902 * we get fewer packets than last time, we assume we made the timeout 800}
903 * too small and increase it by 10 microseconds. Otherwise, we drop it 801
904 * by one microsecond every time. It seems to work well enough. */ 802/* Will reading from this file descriptor block? */
905 if (timeout) { 803static bool will_block(int fd)
906 if (num < last_timeout_num) 804{
907 timeout_usec += 10; 805 fd_set fdset;
908 else if (timeout_usec > 1) 806 struct timeval zero = { 0, 0 };
909 timeout_usec--; 807 FD_ZERO(&fdset);
910 last_timeout_num = num; 808 FD_SET(fd, &fdset);
911 } 809 return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
912} 810}
913 811
914/* This is where we handle a packet coming in from the tun device to our 812/* This is where we handle packets coming in from the tun device to our
915 * Guest. */ 813 * Guest. */
916static bool handle_tun_input(int fd, struct device *dev) 814static void net_input(struct virtqueue *vq)
917{ 815{
918 unsigned int head, in_num, out_num;
919 int len; 816 int len;
920 struct iovec iov[dev->vq->vring.num]; 817 unsigned int head, out, in;
921 818 struct iovec iov[vq->vring.num];
922 /* First we need a network buffer from the Guests's recv virtqueue. */ 819 struct net_info *net_info = vq->dev->priv;
923 head = get_vq_desc(dev->vq, iov, &out_num, &in_num);
924 if (head == dev->vq->vring.num) {
925 /* Now, it's expected that if we try to send a packet too
926 * early, the Guest won't be ready yet. Wait until the device
927 * status says it's ready. */
928 /* FIXME: Actually want DRIVER_ACTIVE here. */
929
930 /* Now tell it we want to know if new things appear. */
931 dev->vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
932 wmb();
933
934 /* We'll turn this back on if input buffers are registered. */
935 return false;
936 } else if (out_num)
937 errx(1, "Output buffers in network recv queue?");
938
939 /* Read the packet from the device directly into the Guest's buffer. */
940 len = readv(dev->fd, iov, in_num);
941 if (len <= 0)
942 err(1, "reading network");
943 820
944 /* Tell the Guest about the new packet. */ 821 head = wait_for_vq_desc(vq, iov, &out, &in);
945 add_used_and_trigger(fd, dev->vq, head, len); 822 if (out)
823 errx(1, "Output buffers in net input queue?");
946 824
947 verbose("tun input packet len %i [%02x %02x] (%s)\n", len, 825 /* Deliver interrupt now, since we're about to sleep. */
948 ((u8 *)iov[1].iov_base)[0], ((u8 *)iov[1].iov_base)[1], 826 if (vq->pending_used && will_block(net_info->tunfd))
949 head != dev->vq->vring.num ? "sent" : "discarded"); 827 trigger_irq(vq);
950 828
951 /* All good. */ 829 len = readv(net_info->tunfd, iov, in);
952 return true; 830 if (len <= 0)
831 err(1, "Failed to read from tun.");
832 add_used(vq, head, len);
953} 833}
954 834
955/*L:215 This is the callback attached to the network and console input 835/* This is the helper to create threads. */
956 * virtqueues: it ensures we try again, in case we stopped console or net 836static int do_thread(void *_vq)
957 * delivery because Guest didn't have any buffers. */
958static void enable_fd(int fd, struct virtqueue *vq, bool timeout)
959{ 837{
960 add_device_fd(vq->dev->fd); 838 struct virtqueue *vq = _vq;
961 /* Snap the Waker out of its select loop. */ 839
962 write(waker_fds.pipe[1], "", 1); 840 for (;;)
841 vq->service(vq);
842 return 0;
963} 843}
964 844
965static void net_enable_fd(int fd, struct virtqueue *vq, bool timeout) 845/* When a child dies, we kill our entire process group with SIGTERM. This
846 * also has the side effect that the shell restores the console for us! */
847static void kill_launcher(int signal)
966{ 848{
967 /* We don't need to know again when Guest refills receive buffer. */ 849 kill(0, SIGTERM);
968 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
969 enable_fd(fd, vq, timeout);
970} 850}
971 851
972/* When the Guest tells us they updated the status field, we handle it. */ 852static void reset_device(struct device *dev)
973static void update_device_status(struct device *dev)
974{ 853{
975 struct virtqueue *vq; 854 struct virtqueue *vq;
976 855
977 /* This is a reset. */ 856 verbose("Resetting device %s\n", dev->name);
978 if (dev->desc->status == 0) {
979 verbose("Resetting device %s\n", dev->name);
980 857
981 /* Clear any features they've acked. */ 858 /* Clear any features they've acked. */
982 memset(get_feature_bits(dev) + dev->desc->feature_len, 0, 859 memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
983 dev->desc->feature_len);
984 860
985 /* Zero out the virtqueues. */ 861 /* We're going to be explicitly killing threads, so ignore them. */
986 for (vq = dev->vq; vq; vq = vq->next) { 862 signal(SIGCHLD, SIG_IGN);
987 memset(vq->vring.desc, 0, 863
988 vring_size(vq->config.num, LGUEST_VRING_ALIGN)); 864 /* Zero out the virtqueues, get rid of their threads */
989 lg_last_avail(vq) = 0; 865 for (vq = dev->vq; vq; vq = vq->next) {
866 if (vq->thread != (pid_t)-1) {
867 kill(vq->thread, SIGTERM);
868 waitpid(vq->thread, NULL, 0);
869 vq->thread = (pid_t)-1;
990 } 870 }
991 } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { 871 memset(vq->vring.desc, 0,
872 vring_size(vq->config.num, LGUEST_VRING_ALIGN));
873 lg_last_avail(vq) = 0;
874 }
875 dev->running = false;
876
877 /* Now we care if threads die. */
878 signal(SIGCHLD, (void *)kill_launcher);
879}
880
881static void create_thread(struct virtqueue *vq)
882{
883 /* Create stack for thread and run it. Since stack grows
884 * upwards, we point the stack pointer to the end of this
885 * region. */
886 char *stack = malloc(32768);
887 unsigned long args[] = { LHREQ_EVENTFD,
888 vq->config.pfn*getpagesize(), 0 };
889
890 /* Create a zero-initialized eventfd. */
891 vq->eventfd = eventfd(0, 0);
892 if (vq->eventfd < 0)
893 err(1, "Creating eventfd");
894 args[2] = vq->eventfd;
895
896 /* Attach an eventfd to this virtqueue: it will go off
897 * when the Guest does an LHCALL_NOTIFY for this vq. */
898 if (write(lguest_fd, &args, sizeof(args)) != 0)
899 err(1, "Attaching eventfd");
900
901 /* CLONE_VM: because it has to access the Guest memory, and
902 * SIGCHLD so we get a signal if it dies. */
903 vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
904 if (vq->thread == (pid_t)-1)
905 err(1, "Creating clone");
906 /* We close our local copy, now the child has it. */
907 close(vq->eventfd);
908}
909
910static void start_device(struct device *dev)
911{
912 unsigned int i;
913 struct virtqueue *vq;
914
915 verbose("Device %s OK: offered", dev->name);
916 for (i = 0; i < dev->feature_len; i++)
917 verbose(" %02x", get_feature_bits(dev)[i]);
918 verbose(", accepted");
919 for (i = 0; i < dev->feature_len; i++)
920 verbose(" %02x", get_feature_bits(dev)
921 [dev->feature_len+i]);
922
923 for (vq = dev->vq; vq; vq = vq->next) {
924 if (vq->service)
925 create_thread(vq);
926 }
927 dev->running = true;
928}
929
930static void cleanup_devices(void)
931{
932 struct device *dev;
933
934 for (dev = devices.dev; dev; dev = dev->next)
935 reset_device(dev);
936
937 /* If we saved off the original terminal settings, restore them now. */
938 if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
939 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
940}
941
942/* When the Guest tells us they updated the status field, we handle it. */
943static void update_device_status(struct device *dev)
944{
945 /* A zero status is a reset, otherwise it's a set of flags. */
946 if (dev->desc->status == 0)
947 reset_device(dev);
948 else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
992 warnx("Device %s configuration FAILED", dev->name); 949 warnx("Device %s configuration FAILED", dev->name);
950 if (dev->running)
951 reset_device(dev);
993 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { 952 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
994 unsigned int i; 953 if (!dev->running)
995 954 start_device(dev);
996 verbose("Device %s OK: offered", dev->name);
997 for (i = 0; i < dev->desc->feature_len; i++)
998 verbose(" %02x", get_feature_bits(dev)[i]);
999 verbose(", accepted");
1000 for (i = 0; i < dev->desc->feature_len; i++)
1001 verbose(" %02x", get_feature_bits(dev)
1002 [dev->desc->feature_len+i]);
1003
1004 if (dev->ready)
1005 dev->ready(dev);
1006 } 955 }
1007} 956}
1008 957
1009/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ 958/* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
1010static void handle_output(int fd, unsigned long addr) 959static void handle_output(unsigned long addr)
1011{ 960{
1012 struct device *i; 961 struct device *i;
1013 struct virtqueue *vq;
1014 962
1015 /* Check each device and virtqueue. */ 963 /* Check each device. */
1016 for (i = devices.dev; i; i = i->next) { 964 for (i = devices.dev; i; i = i->next) {
965 struct virtqueue *vq;
966
1017 /* Notifications to device descriptors update device status. */ 967 /* Notifications to device descriptors update device status. */
1018 if (from_guest_phys(addr) == i->desc) { 968 if (from_guest_phys(addr) == i->desc) {
1019 update_device_status(i); 969 update_device_status(i);
1020 return; 970 return;
1021 } 971 }
1022 972
1023 /* Notifications to virtqueues mean output has occurred. */ 973 /* Devices *can* be used before status is set to DRIVER_OK. */
1024 for (vq = i->vq; vq; vq = vq->next) { 974 for (vq = i->vq; vq; vq = vq->next) {
1025 if (vq->config.pfn != addr/getpagesize()) 975 if (addr != vq->config.pfn*getpagesize())
1026 continue; 976 continue;
1027 977 if (i->running)
1028 /* Guest should acknowledge (and set features!) before 978 errx(1, "Notification on running %s", i->name);
1029 * using the device. */ 979 start_device(i);
1030 if (i->desc->status == 0) {
1031 warnx("%s gave early output", i->name);
1032 return;
1033 }
1034
1035 if (strcmp(vq->dev->name, "console") != 0)
1036 verbose("Output to %s\n", vq->dev->name);
1037 if (vq->handle_output)
1038 vq->handle_output(fd, vq, false);
1039 return; 980 return;
1040 } 981 }
1041 } 982 }
@@ -1049,71 +990,6 @@ static void handle_output(int fd, unsigned long addr)
1049 strnlen(from_guest_phys(addr), guest_limit - addr)); 990 strnlen(from_guest_phys(addr), guest_limit - addr));
1050} 991}
1051 992
1052static void handle_timeout(int fd)
1053{
1054 char buf[32];
1055 struct device *i;
1056 struct virtqueue *vq;
1057
1058 /* Clear the pipe */
1059 read(timeoutpipe[0], buf, sizeof(buf));
1060
1061 /* Check each device and virtqueue: flush blocked ones. */
1062 for (i = devices.dev; i; i = i->next) {
1063 for (vq = i->vq; vq; vq = vq->next) {
1064 if (!vq->blocked)
1065 continue;
1066
1067 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
1068 vq->blocked = false;
1069 if (vq->handle_output)
1070 vq->handle_output(fd, vq, true);
1071 }
1072 }
1073}
1074
1075/* This is called when the Waker wakes us up: check for incoming file
1076 * descriptors. */
1077static void handle_input(int fd)
1078{
1079 /* select() wants a zeroed timeval to mean "don't wait". */
1080 struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
1081
1082 for (;;) {
1083 struct device *i;
1084 fd_set fds = devices.infds;
1085 int num;
1086
1087 num = select(devices.max_infd+1, &fds, NULL, NULL, &poll);
1088 /* Could get interrupted */
1089 if (num < 0)
1090 continue;
1091 /* If nothing is ready, we're done. */
1092 if (num == 0)
1093 break;
1094
1095 /* Otherwise, call the device(s) which have readable file
1096 * descriptors and a method of handling them. */
1097 for (i = devices.dev; i; i = i->next) {
1098 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
1099 if (i->handle_input(fd, i))
1100 continue;
1101
1102 /* If handle_input() returns false, it means we
1103 * should no longer service it. Networking and
1104 * console do this when there's no input
1105 * buffers to deliver into. Console also uses
1106 * it when it discovers that stdin is closed. */
1107 FD_CLR(i->fd, &devices.infds);
1108 }
1109 }
1110
1111 /* Is this the timeout fd? */
1112 if (FD_ISSET(timeoutpipe[0], &fds))
1113 handle_timeout(fd);
1114 }
1115}
1116
1117/*L:190 993/*L:190
1118 * Device Setup 994 * Device Setup
1119 * 995 *
@@ -1129,8 +1005,8 @@ static void handle_input(int fd)
1129static u8 *device_config(const struct device *dev) 1005static u8 *device_config(const struct device *dev)
1130{ 1006{
1131 return (void *)(dev->desc + 1) 1007 return (void *)(dev->desc + 1)
1132 + dev->desc->num_vq * sizeof(struct lguest_vqconfig) 1008 + dev->num_vq * sizeof(struct lguest_vqconfig)
1133 + dev->desc->feature_len * 2; 1009 + dev->feature_len * 2;
1134} 1010}
1135 1011
1136/* This routine allocates a new "struct lguest_device_desc" from descriptor 1012/* This routine allocates a new "struct lguest_device_desc" from descriptor
@@ -1159,7 +1035,7 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
1159/* Each device descriptor is followed by the description of its virtqueues. We 1035/* Each device descriptor is followed by the description of its virtqueues. We
1160 * specify how many descriptors the virtqueue is to have. */ 1036 * specify how many descriptors the virtqueue is to have. */
1161static void add_virtqueue(struct device *dev, unsigned int num_descs, 1037static void add_virtqueue(struct device *dev, unsigned int num_descs,
1162 void (*handle_output)(int, struct virtqueue *, bool)) 1038 void (*service)(struct virtqueue *))
1163{ 1039{
1164 unsigned int pages; 1040 unsigned int pages;
1165 struct virtqueue **i, *vq = malloc(sizeof(*vq)); 1041 struct virtqueue **i, *vq = malloc(sizeof(*vq));
@@ -1174,8 +1050,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1174 vq->next = NULL; 1050 vq->next = NULL;
1175 vq->last_avail_idx = 0; 1051 vq->last_avail_idx = 0;
1176 vq->dev = dev; 1052 vq->dev = dev;
1177 vq->inflight = 0; 1053 vq->service = service;
1178 vq->blocked = false; 1054 vq->thread = (pid_t)-1;
1179 1055
1180 /* Initialize the configuration. */ 1056 /* Initialize the configuration. */
1181 vq->config.num = num_descs; 1057 vq->config.num = num_descs;
@@ -1191,6 +1067,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1191 * yet, otherwise we'd be overwriting them. */ 1067 * yet, otherwise we'd be overwriting them. */
1192 assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); 1068 assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
1193 memcpy(device_config(dev), &vq->config, sizeof(vq->config)); 1069 memcpy(device_config(dev), &vq->config, sizeof(vq->config));
1070 dev->num_vq++;
1194 dev->desc->num_vq++; 1071 dev->desc->num_vq++;
1195 1072
1196 verbose("Virtqueue page %#lx\n", to_guest_phys(p)); 1073 verbose("Virtqueue page %#lx\n", to_guest_phys(p));
@@ -1199,15 +1076,6 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1199 * second. */ 1076 * second. */
1200 for (i = &dev->vq; *i; i = &(*i)->next); 1077 for (i = &dev->vq; *i; i = &(*i)->next);
1201 *i = vq; 1078 *i = vq;
1202
1203 /* Set the routine to call when the Guest does something to this
1204 * virtqueue. */
1205 vq->handle_output = handle_output;
1206
1207 /* As an optimization, set the advisory "Don't Notify Me" flag if we
1208 * don't have a handler */
1209 if (!handle_output)
1210 vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
1211} 1079}
1212 1080
1213/* The first half of the feature bitmask is for us to advertise features. The 1081/* The first half of the feature bitmask is for us to advertise features. The
@@ -1219,7 +1087,7 @@ static void add_feature(struct device *dev, unsigned bit)
1219 /* We can't extend the feature bits once we've added config bytes */ 1087 /* We can't extend the feature bits once we've added config bytes */
1220 if (dev->desc->feature_len <= bit / CHAR_BIT) { 1088 if (dev->desc->feature_len <= bit / CHAR_BIT) {
1221 assert(dev->desc->config_len == 0); 1089 assert(dev->desc->config_len == 0);
1222 dev->desc->feature_len = (bit / CHAR_BIT) + 1; 1090 dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
1223 } 1091 }
1224 1092
1225 features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); 1093 features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
@@ -1243,22 +1111,17 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
1243 * calling new_dev_desc() to allocate the descriptor and device memory. 1111 * calling new_dev_desc() to allocate the descriptor and device memory.
1244 * 1112 *
1245 * See what I mean about userspace being boring? */ 1113 * See what I mean about userspace being boring? */
1246static struct device *new_device(const char *name, u16 type, int fd, 1114static struct device *new_device(const char *name, u16 type)
1247 bool (*handle_input)(int, struct device *))
1248{ 1115{
1249 struct device *dev = malloc(sizeof(*dev)); 1116 struct device *dev = malloc(sizeof(*dev));
1250 1117
1251 /* Now we populate the fields one at a time. */ 1118 /* Now we populate the fields one at a time. */
1252 dev->fd = fd;
1253 /* If we have an input handler for this file descriptor, then we add it
1254 * to the device_list's fdset and maxfd. */
1255 if (handle_input)
1256 add_device_fd(dev->fd);
1257 dev->desc = new_dev_desc(type); 1119 dev->desc = new_dev_desc(type);
1258 dev->handle_input = handle_input;
1259 dev->name = name; 1120 dev->name = name;
1260 dev->vq = NULL; 1121 dev->vq = NULL;
1261 dev->ready = NULL; 1122 dev->feature_len = 0;
1123 dev->num_vq = 0;
1124 dev->running = false;
1262 1125
1263 /* Append to device list. Prepending to a single-linked list is 1126 /* Append to device list. Prepending to a single-linked list is
1264 * easier, but the user expects the devices to be arranged on the bus 1127 * easier, but the user expects the devices to be arranged on the bus
@@ -1286,13 +1149,10 @@ static void setup_console(void)
1286 * raw input stream to the Guest. */ 1149 * raw input stream to the Guest. */
1287 term.c_lflag &= ~(ISIG|ICANON|ECHO); 1150 term.c_lflag &= ~(ISIG|ICANON|ECHO);
1288 tcsetattr(STDIN_FILENO, TCSANOW, &term); 1151 tcsetattr(STDIN_FILENO, TCSANOW, &term);
1289 /* If we exit gracefully, the original settings will be
1290 * restored so the user can see what they're typing. */
1291 atexit(restore_term);
1292 } 1152 }
1293 1153
1294 dev = new_device("console", VIRTIO_ID_CONSOLE, 1154 dev = new_device("console", VIRTIO_ID_CONSOLE);
1295 STDIN_FILENO, handle_console_input); 1155
1296 /* We store the console state in dev->priv, and initialize it. */ 1156 /* We store the console state in dev->priv, and initialize it. */
1297 dev->priv = malloc(sizeof(struct console_abort)); 1157 dev->priv = malloc(sizeof(struct console_abort));
1298 ((struct console_abort *)dev->priv)->count = 0; 1158 ((struct console_abort *)dev->priv)->count = 0;
@@ -1301,31 +1161,13 @@ static void setup_console(void)
1301 * they put something the input queue, we make sure we're listening to 1161 * they put something the input queue, we make sure we're listening to
1302 * stdin. When they put something in the output queue, we write it to 1162 * stdin. When they put something in the output queue, we write it to
1303 * stdout. */ 1163 * stdout. */
1304 add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); 1164 add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
1305 add_virtqueue(dev, VIRTQUEUE_NUM, handle_console_output); 1165 add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
1306 1166
1307 verbose("device %u: console\n", devices.device_num++); 1167 verbose("device %u: console\n", ++devices.device_num);
1308} 1168}
1309/*:*/ 1169/*:*/
1310 1170
1311static void timeout_alarm(int sig)
1312{
1313 write(timeoutpipe[1], "", 1);
1314}
1315
1316static void setup_timeout(void)
1317{
1318 if (pipe(timeoutpipe) != 0)
1319 err(1, "Creating timeout pipe");
1320
1321 if (fcntl(timeoutpipe[1], F_SETFL,
1322 fcntl(timeoutpipe[1], F_GETFL) | O_NONBLOCK) != 0)
1323 err(1, "Making timeout pipe nonblocking");
1324
1325 add_device_fd(timeoutpipe[0]);
1326 signal(SIGALRM, timeout_alarm);
1327}
1328
1329/*M:010 Inter-guest networking is an interesting area. Simplest is to have a 1171/*M:010 Inter-guest networking is an interesting area. Simplest is to have a
1330 * --sharenet=<name> option which opens or creates a named pipe. This can be 1172 * --sharenet=<name> option which opens or creates a named pipe. This can be
1331 * used to send packets to another guest in a 1:1 manner. 1173 * used to send packets to another guest in a 1:1 manner.
@@ -1447,21 +1289,23 @@ static int get_tun_device(char tapif[IFNAMSIZ])
1447static void setup_tun_net(char *arg) 1289static void setup_tun_net(char *arg)
1448{ 1290{
1449 struct device *dev; 1291 struct device *dev;
1450 int netfd, ipfd; 1292 struct net_info *net_info = malloc(sizeof(*net_info));
1293 int ipfd;
1451 u32 ip = INADDR_ANY; 1294 u32 ip = INADDR_ANY;
1452 bool bridging = false; 1295 bool bridging = false;
1453 char tapif[IFNAMSIZ], *p; 1296 char tapif[IFNAMSIZ], *p;
1454 struct virtio_net_config conf; 1297 struct virtio_net_config conf;
1455 1298
1456 netfd = get_tun_device(tapif); 1299 net_info->tunfd = get_tun_device(tapif);
1457 1300
1458 /* First we create a new network device. */ 1301 /* First we create a new network device. */
1459 dev = new_device("net", VIRTIO_ID_NET, netfd, handle_tun_input); 1302 dev = new_device("net", VIRTIO_ID_NET);
1303 dev->priv = net_info;
1460 1304
1461 /* Network devices need a receive and a send queue, just like 1305 /* Network devices need a receive and a send queue, just like
1462 * console. */ 1306 * console. */
1463 add_virtqueue(dev, VIRTQUEUE_NUM, net_enable_fd); 1307 add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
1464 add_virtqueue(dev, VIRTQUEUE_NUM, handle_net_output); 1308 add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
1465 1309
1466 /* We need a socket to perform the magic network ioctls to bring up the 1310 /* We need a socket to perform the magic network ioctls to bring up the
1467 * tap interface, connect to the bridge etc. Any socket will do! */ 1311 * tap interface, connect to the bridge etc. Any socket will do! */
@@ -1502,6 +1346,8 @@ static void setup_tun_net(char *arg)
1502 add_feature(dev, VIRTIO_NET_F_HOST_TSO4); 1346 add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
1503 add_feature(dev, VIRTIO_NET_F_HOST_TSO6); 1347 add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
1504 add_feature(dev, VIRTIO_NET_F_HOST_ECN); 1348 add_feature(dev, VIRTIO_NET_F_HOST_ECN);
1349 /* We handle indirect ring entries */
1350 add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
1505 set_config(dev, sizeof(conf), &conf); 1351 set_config(dev, sizeof(conf), &conf);
1506 1352
1507 /* We don't need the socket any more; setup is done. */ 1353 /* We don't need the socket any more; setup is done. */
@@ -1550,20 +1396,18 @@ struct vblk_info
1550 * Remember that the block device is handled by a separate I/O thread. We head 1396 * Remember that the block device is handled by a separate I/O thread. We head
1551 * straight into the core of that thread here: 1397 * straight into the core of that thread here:
1552 */ 1398 */
1553static bool service_io(struct device *dev) 1399static void blk_request(struct virtqueue *vq)
1554{ 1400{
1555 struct vblk_info *vblk = dev->priv; 1401 struct vblk_info *vblk = vq->dev->priv;
1556 unsigned int head, out_num, in_num, wlen; 1402 unsigned int head, out_num, in_num, wlen;
1557 int ret; 1403 int ret;
1558 u8 *in; 1404 u8 *in;
1559 struct virtio_blk_outhdr *out; 1405 struct virtio_blk_outhdr *out;
1560 struct iovec iov[dev->vq->vring.num]; 1406 struct iovec iov[vq->vring.num];
1561 off64_t off; 1407 off64_t off;
1562 1408
1563 /* See if there's a request waiting. If not, nothing to do. */ 1409 /* Get the next request. */
1564 head = get_vq_desc(dev->vq, iov, &out_num, &in_num); 1410 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1565 if (head == dev->vq->vring.num)
1566 return false;
1567 1411
1568 /* Every block request should contain at least one output buffer 1412 /* Every block request should contain at least one output buffer
1569 * (detailing the location on disk and the type of request) and one 1413 * (detailing the location on disk and the type of request) and one
@@ -1637,83 +1481,21 @@ static bool service_io(struct device *dev)
1637 if (out->type & VIRTIO_BLK_T_BARRIER) 1481 if (out->type & VIRTIO_BLK_T_BARRIER)
1638 fdatasync(vblk->fd); 1482 fdatasync(vblk->fd);
1639 1483
1640 /* We can't trigger an IRQ, because we're not the Launcher. It does 1484 add_used(vq, head, wlen);
1641 * that when we tell it we're done. */
1642 add_used(dev->vq, head, wlen);
1643 return true;
1644}
1645
1646/* This is the thread which actually services the I/O. */
1647static int io_thread(void *_dev)
1648{
1649 struct device *dev = _dev;
1650 struct vblk_info *vblk = dev->priv;
1651 char c;
1652
1653 /* Close other side of workpipe so we get 0 read when main dies. */
1654 close(vblk->workpipe[1]);
1655 /* Close the other side of the done_fd pipe. */
1656 close(dev->fd);
1657
1658 /* When this read fails, it means Launcher died, so we follow. */
1659 while (read(vblk->workpipe[0], &c, 1) == 1) {
1660 /* We acknowledge each request immediately to reduce latency,
1661 * rather than waiting until we've done them all. I haven't
1662 * measured to see if it makes any difference.
1663 *
1664 * That would be an interesting test, wouldn't it? You could
1665 * also try having more than one I/O thread. */
1666 while (service_io(dev))
1667 write(vblk->done_fd, &c, 1);
1668 }
1669 return 0;
1670}
1671
1672/* Now we've seen the I/O thread, we return to the Launcher to see what happens
1673 * when that thread tells us it's completed some I/O. */
1674static bool handle_io_finish(int fd, struct device *dev)
1675{
1676 char c;
1677
1678 /* If the I/O thread died, presumably it printed the error, so we
1679 * simply exit. */
1680 if (read(dev->fd, &c, 1) != 1)
1681 exit(1);
1682
1683 /* It did some work, so trigger the irq. */
1684 trigger_irq(fd, dev->vq);
1685 return true;
1686}
1687
1688/* When the Guest submits some I/O, we just need to wake the I/O thread. */
1689static void handle_virtblk_output(int fd, struct virtqueue *vq, bool timeout)
1690{
1691 struct vblk_info *vblk = vq->dev->priv;
1692 char c = 0;
1693
1694 /* Wake up I/O thread and tell it to go to work! */
1695 if (write(vblk->workpipe[1], &c, 1) != 1)
1696 /* Presumably it indicated why it died. */
1697 exit(1);
1698} 1485}
1699 1486
1700/*L:198 This actually sets up a virtual block device. */ 1487/*L:198 This actually sets up a virtual block device. */
1701static void setup_block_file(const char *filename) 1488static void setup_block_file(const char *filename)
1702{ 1489{
1703 int p[2];
1704 struct device *dev; 1490 struct device *dev;
1705 struct vblk_info *vblk; 1491 struct vblk_info *vblk;
1706 void *stack;
1707 struct virtio_blk_config conf; 1492 struct virtio_blk_config conf;
1708 1493
1709 /* This is the pipe the I/O thread will use to tell us I/O is done. */
1710 pipe(p);
1711
1712 /* The device responds to return from I/O thread. */ 1494 /* The device responds to return from I/O thread. */
1713 dev = new_device("block", VIRTIO_ID_BLOCK, p[0], handle_io_finish); 1495 dev = new_device("block", VIRTIO_ID_BLOCK);
1714 1496
1715 /* The device has one virtqueue, where the Guest places requests. */ 1497 /* The device has one virtqueue, where the Guest places requests. */
1716 add_virtqueue(dev, VIRTQUEUE_NUM, handle_virtblk_output); 1498 add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
1717 1499
1718 /* Allocate the room for our own bookkeeping */ 1500 /* Allocate the room for our own bookkeeping */
1719 vblk = dev->priv = malloc(sizeof(*vblk)); 1501 vblk = dev->priv = malloc(sizeof(*vblk));
@@ -1735,49 +1517,29 @@ static void setup_block_file(const char *filename)
1735 1517
1736 set_config(dev, sizeof(conf), &conf); 1518 set_config(dev, sizeof(conf), &conf);
1737 1519
1738 /* The I/O thread writes to this end of the pipe when done. */
1739 vblk->done_fd = p[1];
1740
1741 /* This is the second pipe, which is how we tell the I/O thread about
1742 * more work. */
1743 pipe(vblk->workpipe);
1744
1745 /* Create stack for thread and run it. Since stack grows upwards, we
1746 * point the stack pointer to the end of this region. */
1747 stack = malloc(32768);
1748 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
1749 * becoming a zombie. */
1750 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
1751 err(1, "Creating clone");
1752
1753 /* We don't need to keep the I/O thread's end of the pipes open. */
1754 close(vblk->done_fd);
1755 close(vblk->workpipe[0]);
1756
1757 verbose("device %u: virtblock %llu sectors\n", 1520 verbose("device %u: virtblock %llu sectors\n",
1758 devices.device_num, le64_to_cpu(conf.capacity)); 1521 ++devices.device_num, le64_to_cpu(conf.capacity));
1759} 1522}
1760 1523
1524struct rng_info {
1525 int rfd;
1526};
1527
1761/* Our random number generator device reads from /dev/random into the Guest's 1528/* Our random number generator device reads from /dev/random into the Guest's
1762 * input buffers. The usual case is that the Guest doesn't want random numbers 1529 * input buffers. The usual case is that the Guest doesn't want random numbers
1763 * and so has no buffers although /dev/random is still readable, whereas 1530 * and so has no buffers although /dev/random is still readable, whereas
1764 * console is the reverse. 1531 * console is the reverse.
1765 * 1532 *
1766 * The same logic applies, however. */ 1533 * The same logic applies, however. */
1767static bool handle_rng_input(int fd, struct device *dev) 1534static void rng_input(struct virtqueue *vq)
1768{ 1535{
1769 int len; 1536 int len;
1770 unsigned int head, in_num, out_num, totlen = 0; 1537 unsigned int head, in_num, out_num, totlen = 0;
1771 struct iovec iov[dev->vq->vring.num]; 1538 struct rng_info *rng_info = vq->dev->priv;
1539 struct iovec iov[vq->vring.num];
1772 1540
1773 /* First we need a buffer from the Guests's virtqueue. */ 1541 /* First we need a buffer from the Guests's virtqueue. */
1774 head = get_vq_desc(dev->vq, iov, &out_num, &in_num); 1542 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1775
1776 /* If they're not ready for input, stop listening to this file
1777 * descriptor. We'll start again once they add an input buffer. */
1778 if (head == dev->vq->vring.num)
1779 return false;
1780
1781 if (out_num) 1543 if (out_num)
1782 errx(1, "Output buffers in rng?"); 1544 errx(1, "Output buffers in rng?");
1783 1545
@@ -1785,7 +1547,7 @@ static bool handle_rng_input(int fd, struct device *dev)
1785 * it reads straight into the Guest's buffer. We loop to make sure we 1547 * it reads straight into the Guest's buffer. We loop to make sure we
1786 * fill it. */ 1548 * fill it. */
1787 while (!iov_empty(iov, in_num)) { 1549 while (!iov_empty(iov, in_num)) {
1788 len = readv(dev->fd, iov, in_num); 1550 len = readv(rng_info->rfd, iov, in_num);
1789 if (len <= 0) 1551 if (len <= 0)
1790 err(1, "Read from /dev/random gave %i", len); 1552 err(1, "Read from /dev/random gave %i", len);
1791 iov_consume(iov, in_num, len); 1553 iov_consume(iov, in_num, len);
@@ -1793,25 +1555,23 @@ static bool handle_rng_input(int fd, struct device *dev)
1793 } 1555 }
1794 1556
1795 /* Tell the Guest about the new input. */ 1557 /* Tell the Guest about the new input. */
1796 add_used_and_trigger(fd, dev->vq, head, totlen); 1558 add_used(vq, head, totlen);
1797
1798 /* Everything went OK! */
1799 return true;
1800} 1559}
1801 1560
1802/* And this creates a "hardware" random number device for the Guest. */ 1561/* And this creates a "hardware" random number device for the Guest. */
1803static void setup_rng(void) 1562static void setup_rng(void)
1804{ 1563{
1805 struct device *dev; 1564 struct device *dev;
1806 int fd; 1565 struct rng_info *rng_info = malloc(sizeof(*rng_info));
1807 1566
1808 fd = open_or_die("/dev/random", O_RDONLY); 1567 rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
1809 1568
1810 /* The device responds to return from I/O thread. */ 1569 /* The device responds to return from I/O thread. */
1811 dev = new_device("rng", VIRTIO_ID_RNG, fd, handle_rng_input); 1570 dev = new_device("rng", VIRTIO_ID_RNG);
1571 dev->priv = rng_info;
1812 1572
1813 /* The device has one virtqueue, where the Guest places inbufs. */ 1573 /* The device has one virtqueue, where the Guest places inbufs. */
1814 add_virtqueue(dev, VIRTQUEUE_NUM, enable_fd); 1574 add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
1815 1575
1816 verbose("device %u: rng\n", devices.device_num++); 1576 verbose("device %u: rng\n", devices.device_num++);
1817} 1577}
@@ -1827,17 +1587,18 @@ static void __attribute__((noreturn)) restart_guest(void)
1827 for (i = 3; i < FD_SETSIZE; i++) 1587 for (i = 3; i < FD_SETSIZE; i++)
1828 close(i); 1588 close(i);
1829 1589
1830 /* The exec automatically gets rid of the I/O and Waker threads. */ 1590 /* Reset all the devices (kills all threads). */
1591 cleanup_devices();
1592
1831 execv(main_args[0], main_args); 1593 execv(main_args[0], main_args);
1832 err(1, "Could not exec %s", main_args[0]); 1594 err(1, "Could not exec %s", main_args[0]);
1833} 1595}
1834 1596
1835/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves 1597/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
1836 * its input and output, and finally, lays it to rest. */ 1598 * its input and output, and finally, lays it to rest. */
1837static void __attribute__((noreturn)) run_guest(int lguest_fd) 1599static void __attribute__((noreturn)) run_guest(void)
1838{ 1600{
1839 for (;;) { 1601 for (;;) {
1840 unsigned long args[] = { LHREQ_BREAK, 0 };
1841 unsigned long notify_addr; 1602 unsigned long notify_addr;
1842 int readval; 1603 int readval;
1843 1604
@@ -1848,8 +1609,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1848 /* One unsigned long means the Guest did HCALL_NOTIFY */ 1609 /* One unsigned long means the Guest did HCALL_NOTIFY */
1849 if (readval == sizeof(notify_addr)) { 1610 if (readval == sizeof(notify_addr)) {
1850 verbose("Notify on address %#lx\n", notify_addr); 1611 verbose("Notify on address %#lx\n", notify_addr);
1851 handle_output(lguest_fd, notify_addr); 1612 handle_output(notify_addr);
1852 continue;
1853 /* ENOENT means the Guest died. Reading tells us why. */ 1613 /* ENOENT means the Guest died. Reading tells us why. */
1854 } else if (errno == ENOENT) { 1614 } else if (errno == ENOENT) {
1855 char reason[1024] = { 0 }; 1615 char reason[1024] = { 0 };
@@ -1858,19 +1618,9 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1858 /* ERESTART means that we need to reboot the guest */ 1618 /* ERESTART means that we need to reboot the guest */
1859 } else if (errno == ERESTART) { 1619 } else if (errno == ERESTART) {
1860 restart_guest(); 1620 restart_guest();
1861 /* EAGAIN means a signal (timeout). 1621 /* Anything else means a bug or incompatible change. */
1862 * Anything else means a bug or incompatible change. */ 1622 } else
1863 } else if (errno != EAGAIN)
1864 err(1, "Running guest failed"); 1623 err(1, "Running guest failed");
1865
1866 /* Only service input on thread for CPU 0. */
1867 if (cpu_id != 0)
1868 continue;
1869
1870 /* Service input, then unset the BREAK to release the Waker. */
1871 handle_input(lguest_fd);
1872 if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
1873 err(1, "Resetting break");
1874 } 1624 }
1875} 1625}
1876/*L:240 1626/*L:240
@@ -1904,8 +1654,8 @@ int main(int argc, char *argv[])
1904 /* Memory, top-level pagetable, code startpoint and size of the 1654 /* Memory, top-level pagetable, code startpoint and size of the
1905 * (optional) initrd. */ 1655 * (optional) initrd. */
1906 unsigned long mem = 0, start, initrd_size = 0; 1656 unsigned long mem = 0, start, initrd_size = 0;
1907 /* Two temporaries and the /dev/lguest file descriptor. */ 1657 /* Two temporaries. */
1908 int i, c, lguest_fd; 1658 int i, c;
1909 /* The boot information for the Guest. */ 1659 /* The boot information for the Guest. */
1910 struct boot_params *boot; 1660 struct boot_params *boot;
1911 /* If they specify an initrd file to load. */ 1661 /* If they specify an initrd file to load. */
@@ -1913,18 +1663,10 @@ int main(int argc, char *argv[])
1913 1663
1914 /* Save the args: we "reboot" by execing ourselves again. */ 1664 /* Save the args: we "reboot" by execing ourselves again. */
1915 main_args = argv; 1665 main_args = argv;
1916 /* We don't "wait" for the children, so prevent them from becoming
1917 * zombies. */
1918 signal(SIGCHLD, SIG_IGN);
1919 1666
1920 /* First we initialize the device list. Since console and network 1667 /* First we initialize the device list. We keep a pointer to the last
1921 * device receive input from a file descriptor, we keep an fdset 1668 * device, and the next interrupt number to use for devices (1:
1922 * (infds) and the maximum fd number (max_infd) with the head of the 1669 * remember that 0 is used by the timer). */
1923 * list. We also keep a pointer to the last device. Finally, we keep
1924 * the next interrupt number to use for devices (1: remember that 0 is
1925 * used by the timer). */
1926 FD_ZERO(&devices.infds);
1927 devices.max_infd = -1;
1928 devices.lastdev = NULL; 1670 devices.lastdev = NULL;
1929 devices.next_irq = 1; 1671 devices.next_irq = 1;
1930 1672
@@ -1982,9 +1724,6 @@ int main(int argc, char *argv[])
1982 /* We always have a console device */ 1724 /* We always have a console device */
1983 setup_console(); 1725 setup_console();
1984 1726
1985 /* We can timeout waiting for Guest network transmit. */
1986 setup_timeout();
1987
1988 /* Now we load the kernel */ 1727 /* Now we load the kernel */
1989 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); 1728 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
1990 1729
@@ -2023,15 +1762,16 @@ int main(int argc, char *argv[])
2023 1762
2024 /* We tell the kernel to initialize the Guest: this returns the open 1763 /* We tell the kernel to initialize the Guest: this returns the open
2025 * /dev/lguest file descriptor. */ 1764 * /dev/lguest file descriptor. */
2026 lguest_fd = tell_kernel(start); 1765 tell_kernel(start);
1766
1767 /* Ensure that we terminate if a child dies. */
1768 signal(SIGCHLD, kill_launcher);
2027 1769
2028 /* We clone off a thread, which wakes the Launcher whenever one of the 1770 /* If we exit via err(), this kills all the threads, restores tty. */
2029 * input file descriptors needs attention. We call this the Waker, and 1771 atexit(cleanup_devices);
2030 * we'll cover it in a moment. */
2031 setup_waker(lguest_fd);
2032 1772
2033 /* Finally, run the Guest. This doesn't return. */ 1773 /* Finally, run the Guest. This doesn't return. */
2034 run_guest(lguest_fd); 1774 run_guest();
2035} 1775}
2036/*:*/ 1776/*:*/
2037 1777
diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt
index 28c747362f95..efb3a6a045a2 100644
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/lguest/lguest.txt
@@ -37,7 +37,6 @@ Running Lguest:
37 "Paravirtualized guest support" = Y 37 "Paravirtualized guest support" = Y
38 "Lguest guest support" = Y 38 "Lguest guest support" = Y
39 "High Memory Support" = off/4GB 39 "High Memory Support" = off/4GB
40 "PAE (Physical Address Extension) Support" = N
41 "Alignment value to which kernel should be aligned" = 0x100000 40 "Alignment value to which kernel should be aligned" = 0x100000
42 (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and 41 (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
43 CONFIG_PHYSICAL_ALIGN=0x100000) 42 CONFIG_PHYSICAL_ALIGN=0x100000)
diff --git a/Documentation/local_ops.txt b/Documentation/local_ops.txt
index 23045b8b50f0..300da4bdfdbd 100644
--- a/Documentation/local_ops.txt
+++ b/Documentation/local_ops.txt
@@ -34,7 +34,7 @@ out of order wrt other memory writes by the owner CPU.
34 34
35It can be done by slightly modifying the standard atomic operations : only 35It can be done by slightly modifying the standard atomic operations : only
36their UP variant must be kept. It typically means removing LOCK prefix (on 36their UP variant must be kept. It typically means removing LOCK prefix (on
37i386 and x86_64) and any SMP sychronization barrier. If the architecture does 37i386 and x86_64) and any SMP synchronization barrier. If the architecture does
38not have a different behavior between SMP and UP, including asm-generic/local.h 38not have a different behavior between SMP and UP, including asm-generic/local.h
39in your architecture's local.h is sufficient. 39in your architecture's local.h is sufficient.
40 40
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f5b7127f54ac..7f5809eddee6 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -31,6 +31,7 @@ Contents:
31 31
32 - Locking functions. 32 - Locking functions.
33 - Interrupt disabling functions. 33 - Interrupt disabling functions.
34 - Sleep and wake-up functions.
34 - Miscellaneous functions. 35 - Miscellaneous functions.
35 36
36 (*) Inter-CPU locking barrier effects. 37 (*) Inter-CPU locking barrier effects.
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some
1217other means. 1218other means.
1218 1219
1219 1220
1221SLEEP AND WAKE-UP FUNCTIONS
1222---------------------------
1223
1224Sleeping and waking on an event flagged in global data can be viewed as an
1225interaction between two pieces of data: the task state of the task waiting for
1226the event and the global data used to indicate the event. To make sure that
1227these appear to happen in the right order, the primitives to begin the process
1228of going to sleep, and the primitives to initiate a wake up imply certain
1229barriers.
1230
1231Firstly, the sleeper normally follows something like this sequence of events:
1232
1233 for (;;) {
1234 set_current_state(TASK_UNINTERRUPTIBLE);
1235 if (event_indicated)
1236 break;
1237 schedule();
1238 }
1239
1240A general memory barrier is interpolated automatically by set_current_state()
1241after it has altered the task state:
1242
1243 CPU 1
1244 ===============================
1245 set_current_state();
1246 set_mb();
1247 STORE current->state
1248 <general barrier>
1249 LOAD event_indicated
1250
1251set_current_state() may be wrapped by:
1252
1253 prepare_to_wait();
1254 prepare_to_wait_exclusive();
1255
1256which therefore also imply a general memory barrier after setting the state.
1257The whole sequence above is available in various canned forms, all of which
1258interpolate the memory barrier in the right place:
1259
1260 wait_event();
1261 wait_event_interruptible();
1262 wait_event_interruptible_exclusive();
1263 wait_event_interruptible_timeout();
1264 wait_event_killable();
1265 wait_event_timeout();
1266 wait_on_bit();
1267 wait_on_bit_lock();
1268
1269
1270Secondly, code that performs a wake up normally follows something like this:
1271
1272 event_indicated = 1;
1273 wake_up(&event_wait_queue);
1274
1275or:
1276
1277 event_indicated = 1;
1278 wake_up_process(event_daemon);
1279
1280A write memory barrier is implied by wake_up() and co. if and only if they wake
1281something up. The barrier occurs before the task state is cleared, and so sits
1282between the STORE to indicate the event and the STORE to set TASK_RUNNING:
1283
1284 CPU 1 CPU 2
1285 =============================== ===============================
1286 set_current_state(); STORE event_indicated
1287 set_mb(); wake_up();
1288 STORE current->state <write barrier>
1289 <general barrier> STORE current->state
1290 LOAD event_indicated
1291
1292The available waker functions include:
1293
1294 complete();
1295 wake_up();
1296 wake_up_all();
1297 wake_up_bit();
1298 wake_up_interruptible();
1299 wake_up_interruptible_all();
1300 wake_up_interruptible_nr();
1301 wake_up_interruptible_poll();
1302 wake_up_interruptible_sync();
1303 wake_up_interruptible_sync_poll();
1304 wake_up_locked();
1305 wake_up_locked_poll();
1306 wake_up_nr();
1307 wake_up_poll();
1308 wake_up_process();
1309
1310
1311[!] Note that the memory barriers implied by the sleeper and the waker do _not_
1312order multiple stores before the wake-up with respect to loads of those stored
1313values after the sleeper has called set_current_state(). For instance, if the
1314sleeper does:
1315
1316 set_current_state(TASK_INTERRUPTIBLE);
1317 if (event_indicated)
1318 break;
1319 __set_current_state(TASK_RUNNING);
1320 do_something(my_data);
1321
1322and the waker does:
1323
1324 my_data = value;
1325 event_indicated = 1;
1326 wake_up(&event_wait_queue);
1327
1328there's no guarantee that the change to event_indicated will be perceived by
1329the sleeper as coming after the change to my_data. In such a circumstance, the
1330code on both sides must interpolate its own memory barriers between the
1331separate data accesses. Thus the above sleeper ought to do:
1332
1333 set_current_state(TASK_INTERRUPTIBLE);
1334 if (event_indicated) {
1335 smp_rmb();
1336 do_something(my_data);
1337 }
1338
1339and the waker should do:
1340
1341 my_data = value;
1342 smp_wmb();
1343 event_indicated = 1;
1344 wake_up(&event_wait_queue);
1345
1346
1220MISCELLANEOUS FUNCTIONS 1347MISCELLANEOUS FUNCTIONS
1221----------------------- 1348-----------------------
1222 1349
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED?
1366 1493
1367Under normal operation, memory operation reordering is generally not going to 1494Under normal operation, memory operation reordering is generally not going to
1368be a problem as a single-threaded linear piece of code will still appear to 1495be a problem as a single-threaded linear piece of code will still appear to
1369work correctly, even if it's in an SMP kernel. There are, however, three 1496work correctly, even if it's in an SMP kernel. There are, however, four
1370circumstances in which reordering definitely _could_ be a problem: 1497circumstances in which reordering definitely _could_ be a problem:
1371 1498
1372 (*) Interprocessor interaction. 1499 (*) Interprocessor interaction.
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 4c2ecf537a4a..bbc8a6a36921 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -73,13 +73,13 @@ this phase is triggered automatically. ACPI can notify this event. If not,
73(see Section 4.). 73(see Section 4.).
74 74
75Logical Memory Hotplug phase is to change memory state into 75Logical Memory Hotplug phase is to change memory state into
76avaiable/unavailable for users. Amount of memory from user's view is 76available/unavailable for users. Amount of memory from user's view is
77changed by this phase. The kernel makes all memory in it as free pages 77changed by this phase. The kernel makes all memory in it as free pages
78when a memory range is available. 78when a memory range is available.
79 79
80In this document, this phase is described as online/offline. 80In this document, this phase is described as online/offline.
81 81
82Logical Memory Hotplug phase is triggred by write of sysfs file by system 82Logical Memory Hotplug phase is triggered by write of sysfs file by system
83administrator. For the hot-add case, it must be executed after Physical Hotplug 83administrator. For the hot-add case, it must be executed after Physical Hotplug
84phase by hand. 84phase by hand.
85(However, if you writes udev's hotplug scripts for memory hotplug, these 85(However, if you writes udev's hotplug scripts for memory hotplug, these
@@ -334,7 +334,7 @@ MEMORY_CANCEL_ONLINE
334 Generated if MEMORY_GOING_ONLINE fails. 334 Generated if MEMORY_GOING_ONLINE fails.
335 335
336MEMORY_ONLINE 336MEMORY_ONLINE
337 Generated when memory has succesfully brought online. The callback may 337 Generated when memory has successfully brought online. The callback may
338 allocate pages from the new memory. 338 allocate pages from the new memory.
339 339
340MEMORY_GOING_OFFLINE 340MEMORY_GOING_OFFLINE
@@ -359,7 +359,7 @@ The third argument is passed by pointer of struct memory_notify.
359struct memory_notify { 359struct memory_notify {
360 unsigned long start_pfn; 360 unsigned long start_pfn;
361 unsigned long nr_pages; 361 unsigned long nr_pages;
362 int status_cahnge_nid; 362 int status_change_nid;
363} 363}
364 364
365start_pfn is start_pfn of online/offline memory. 365start_pfn is start_pfn of online/offline memory.
diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt
index 1fef1f06dfd2..d3507bad428d 100644
--- a/Documentation/mn10300/ABI.txt
+++ b/Documentation/mn10300/ABI.txt
@@ -26,7 +26,7 @@ registers and the stack. If the first argument is a 64-bit value, it will be
26passed in D0:D1. If the first argument is not a 64-bit value, but the second 26passed in D0:D1. If the first argument is not a 64-bit value, but the second
27is, the second will be passed entirely on the stack and D1 will be unused. 27is, the second will be passed entirely on the stack and D1 will be unused.
28 28
29Arguments smaller than 32-bits are not coelesced within a register or a stack 29Arguments smaller than 32-bits are not coalesced within a register or a stack
30word. For example, two byte-sized arguments will always be passed in separate 30word. For example, two byte-sized arguments will always be passed in separate
31registers or word-sized stack slots. 31registers or word-sized stack slots.
32 32
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
index bdf93b7f0f24..274821b35a7f 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/mtd/nand_ecc.txt
@@ -50,7 +50,7 @@ byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
50 cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4 50 cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
51 51
52This figure represents a sector of 256 bytes. 52This figure represents a sector of 256 bytes.
53cp is my abbreviaton for column parity, rp for row parity. 53cp is my abbreviation for column parity, rp for row parity.
54 54
55Let's start to explain column parity. 55Let's start to explain column parity.
56cp0 is the parity that belongs to all bit0, bit2, bit4, bit6. 56cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
@@ -560,7 +560,7 @@ Measuring this code again showed big gain. When executing the original
560linux code 1 million times, this took about 1 second on my system. 560linux code 1 million times, this took about 1 second on my system.
561(using time to measure the performance). After this iteration I was back 561(using time to measure the performance). After this iteration I was back
562to 0.075 sec. Actually I had to decide to start measuring over 10 562to 0.075 sec. Actually I had to decide to start measuring over 10
563million interations in order not to loose too much accuracy. This one 563million iterations in order not to lose too much accuracy. This one
564definitely seemed to be the jackpot! 564definitely seemed to be the jackpot!
565 565
566There is a little bit more room for improvement though. There are three 566There is a little bit more room for improvement though. There are three
@@ -571,8 +571,8 @@ loop; This eliminates 3 statements per loop. Of course after the loop we
571need to correct by adding: 571need to correct by adding:
572 rp4 ^= rp4_6; 572 rp4 ^= rp4_6;
573 rp6 ^= rp4_6 573 rp6 ^= rp4_6
574Furthermore there are 4 sequential assingments to rp8. This can be 574Furthermore there are 4 sequential assignments to rp8. This can be
575encoded slightly more efficient by saving tmppar before those 4 lines 575encoded slightly more efficiently by saving tmppar before those 4 lines
576and later do rp8 = rp8 ^ tmppar ^ notrp8; 576and later do rp8 = rp8 ^ tmppar ^ notrp8;
577(where notrp8 is the value of rp8 before those 4 lines). 577(where notrp8 is the value of rp8 before those 4 lines).
578Again a use of the commutative property of xor. 578Again a use of the commutative property of xor.
@@ -622,7 +622,7 @@ Not a big change, but every penny counts :-)
622Analysis 7 622Analysis 7
623========== 623==========
624 624
625Acutally this made things worse. Not very much, but I don't want to move 625Actually this made things worse. Not very much, but I don't want to move
626into the wrong direction. Maybe something to investigate later. Could 626into the wrong direction. Maybe something to investigate later. Could
627have to do with caching again. 627have to do with caching again.
628 628
@@ -642,7 +642,7 @@ Analysis 8
642This makes things worse. Let's stick with attempt 6 and continue from there. 642This makes things worse. Let's stick with attempt 6 and continue from there.
643Although it seems that the code within the loop cannot be optimised 643Although it seems that the code within the loop cannot be optimised
644further there is still room to optimize the generation of the ecc codes. 644further there is still room to optimize the generation of the ecc codes.
645We can simply calcualate the total parity. If this is 0 then rp4 = rp5 645We can simply calculate the total parity. If this is 0 then rp4 = rp5
646etc. If the parity is 1, then rp4 = !rp5; 646etc. If the parity is 1, then rp4 = !rp5;
647But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits 647But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
648in the result byte and then do something like 648in the result byte and then do something like
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 08762750f121..d5181ce9ff62 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -221,7 +221,7 @@ ad_select
221 221
222 - Any slave's 802.3ad association state changes 222 - Any slave's 802.3ad association state changes
223 223
224 - The bond's adminstrative state changes to up 224 - The bond's administrative state changes to up
225 225
226 count or 2 226 count or 2
227 227
@@ -369,7 +369,7 @@ fail_over_mac
369 When this policy is used in conjuction with the mii 369 When this policy is used in conjuction with the mii
370 monitor, devices which assert link up prior to being 370 monitor, devices which assert link up prior to being
371 able to actually transmit and receive are particularly 371 able to actually transmit and receive are particularly
372 susecptible to loss of the gratuitous ARP, and an 372 susceptible to loss of the gratuitous ARP, and an
373 appropriate updelay setting may be required. 373 appropriate updelay setting may be required.
374 374
375 follow or 2 375 follow or 2
@@ -1794,7 +1794,7 @@ target to query.
1794generally referred to as "trunk failover." This is a feature of the 1794generally referred to as "trunk failover." This is a feature of the
1795switch that causes the link state of a particular switch port to be set 1795switch that causes the link state of a particular switch port to be set
1796down (or up) when the state of another switch port goes down (or up). 1796down (or up) when the state of another switch port goes down (or up).
1797It's purpose is to propogate link failures from logically "exterior" ports 1797Its purpose is to propagate link failures from logically "exterior" ports
1798to the logically "interior" ports that bonding is able to monitor via 1798to the logically "interior" ports that bonding is able to monitor via
1799miimon. Availability and configuration for trunk failover varies by 1799miimon. Availability and configuration for trunk failover varies by
1800switch, but this can be a viable alternative to the ARP monitor when using 1800switch, but this can be a viable alternative to the ARP monitor when using
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 2035bc4932f2..cd79735013f9 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -36,10 +36,15 @@ This file contains
36 6.2 local loopback of sent frames 36 6.2 local loopback of sent frames
37 6.3 CAN controller hardware filters 37 6.3 CAN controller hardware filters
38 6.4 The virtual CAN driver (vcan) 38 6.4 The virtual CAN driver (vcan)
39 6.5 currently supported CAN hardware 39 6.5 The CAN network device driver interface
40 6.6 todo 40 6.5.1 Netlink interface to set/get devices properties
41 6.5.2 Setting the CAN bit-timing
42 6.5.3 Starting and stopping the CAN network device
43 6.6 supported CAN hardware
41 44
42 7 Credits 45 7 Socket CAN resources
46
47 8 Credits
43 48
44============================================================================ 49============================================================================
45 50
@@ -234,6 +239,8 @@ solution for a couple of reasons:
234 the user application using the common CAN filter mechanisms. Inside 239 the user application using the common CAN filter mechanisms. Inside
235 this filter definition the (interested) type of errors may be 240 this filter definition the (interested) type of errors may be
236 selected. The reception of error frames is disabled by default. 241 selected. The reception of error frames is disabled by default.
242 The format of the CAN error frame is briefly decribed in the Linux
243 header file "include/linux/can/error.h".
237 244
2384. How to use Socket CAN 2454. How to use Socket CAN
239------------------------ 246------------------------
@@ -327,7 +334,7 @@ solution for a couple of reasons:
327 return 1; 334 return 1;
328 } 335 }
329 336
330 /* paraniod check ... */ 337 /* paranoid check ... */
331 if (nbytes < sizeof(struct can_frame)) { 338 if (nbytes < sizeof(struct can_frame)) {
332 fprintf(stderr, "read: incomplete CAN frame\n"); 339 fprintf(stderr, "read: incomplete CAN frame\n");
333 return 1; 340 return 1;
@@ -605,61 +612,213 @@ solution for a couple of reasons:
605 removal of vcan network devices can be managed with the ip(8) tool: 612 removal of vcan network devices can be managed with the ip(8) tool:
606 613
607 - Create a virtual CAN network interface: 614 - Create a virtual CAN network interface:
608 ip link add type vcan 615 $ ip link add type vcan
609 616
610 - Create a virtual CAN network interface with a specific name 'vcan42': 617 - Create a virtual CAN network interface with a specific name 'vcan42':
611 ip link add dev vcan42 type vcan 618 $ ip link add dev vcan42 type vcan
612 619
613 - Remove a (virtual CAN) network interface 'vcan42': 620 - Remove a (virtual CAN) network interface 'vcan42':
614 ip link del vcan42 621 $ ip link del vcan42
615 622
616 The tool 'vcan' from the SocketCAN SVN repository on BerliOS is obsolete. 623 6.5 The CAN network device driver interface
617 624
618 Virtual CAN network device creation in older Kernels: 625 The CAN network device driver interface provides a generic interface
619 In Linux Kernel versions < 2.6.24 the vcan driver creates 4 vcan 626 to setup, configure and monitor CAN network devices. The user can then
620 netdevices at module load time by default. This value can be changed 627 configure the CAN device, like setting the bit-timing parameters, via
621 with the module parameter 'numdev'. E.g. 'modprobe vcan numdev=8' 628 the netlink interface using the program "ip" from the "IPROUTE2"
622 629 utility suite. The following chapter describes briefly how to use it.
623 6.5 currently supported CAN hardware 630 Furthermore, the interface uses a common data structure and exports a
631 set of common functions, which all real CAN network device drivers
632 should use. Please have a look to the SJA1000 or MSCAN driver to
633 understand how to use them. The name of the module is can-dev.ko.
634
635 6.5.1 Netlink interface to set/get devices properties
636
637 The CAN device must be configured via netlink interface. The supported
638 netlink message types are defined and briefly described in
639 "include/linux/can/netlink.h". CAN link support for the program "ip"
640 of the IPROUTE2 utility suite is avaiable and it can be used as shown
641 below:
642
643 - Setting CAN device properties:
644
645 $ ip link set can0 type can help
646 Usage: ip link set DEVICE type can
647 [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
648 [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
649 phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
650
651 [ loopback { on | off } ]
652 [ listen-only { on | off } ]
653 [ triple-sampling { on | off } ]
654
655 [ restart-ms TIME-MS ]
656 [ restart ]
657
658 Where: BITRATE := { 1..1000000 }
659 SAMPLE-POINT := { 0.000..0.999 }
660 TQ := { NUMBER }
661 PROP-SEG := { 1..8 }
662 PHASE-SEG1 := { 1..8 }
663 PHASE-SEG2 := { 1..8 }
664 SJW := { 1..4 }
665 RESTART-MS := { 0 | NUMBER }
666
667 - Display CAN device details and statistics:
668
669 $ ip -details -statistics link show can0
670 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
671 link/can
672 can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
673 bitrate 125000 sample_point 0.875
674 tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
675 sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
676 clock 8000000
677 re-started bus-errors arbit-lost error-warn error-pass bus-off
678 41 17457 0 41 42 41
679 RX: bytes packets errors dropped overrun mcast
680 140859 17608 17457 0 0 0
681 TX: bytes packets errors dropped carrier collsns
682 861 112 0 41 0 0
683
684 More info to the above output:
685
686 "<TRIPLE-SAMPLING>"
687 Shows the list of selected CAN controller modes: LOOPBACK,
688 LISTEN-ONLY, or TRIPLE-SAMPLING.
689
690 "state ERROR-ACTIVE"
691 The current state of the CAN controller: "ERROR-ACTIVE",
692 "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
693
694 "restart-ms 100"
695 Automatic restart delay time. If set to a non-zero value, a
696 restart of the CAN controller will be triggered automatically
697 in case of a bus-off condition after the specified delay time
698 in milliseconds. By default it's off.
699
700 "bitrate 125000 sample_point 0.875"
701 Shows the real bit-rate in bits/sec and the sample-point in the
702 range 0.000..0.999. If the calculation of bit-timing parameters
703 is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
704 bit-timing can be defined by setting the "bitrate" argument.
705 Optionally the "sample-point" can be specified. By default it's
706 0.000 assuming CIA-recommended sample-points.
707
708 "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
709 Shows the time quanta in ns, propagation segment, phase buffer
710 segment 1 and 2 and the synchronisation jump width in units of
711 tq. They allow to define the CAN bit-timing in a hardware
712 independent format as proposed by the Bosch CAN 2.0 spec (see
713 chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
714
715 "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
716 clock 8000000"
717 Shows the bit-timing constants of the CAN controller, here the
718 "sja1000". The minimum and maximum values of the time segment 1
719 and 2, the synchronisation jump width in units of tq, the
720 bitrate pre-scaler and the CAN system clock frequency in Hz.
721 These constants could be used for user-defined (non-standard)
722 bit-timing calculation algorithms in user-space.
723
724 "re-started bus-errors arbit-lost error-warn error-pass bus-off"
725 Shows the number of restarts, bus and arbitration lost errors,
726 and the state changes to the error-warning, error-passive and
727 bus-off state. RX overrun errors are listed in the "overrun"
728 field of the standard network statistics.
729
730 6.5.2 Setting the CAN bit-timing
731
732 The CAN bit-timing parameters can always be defined in a hardware
733 independent format as proposed in the Bosch CAN 2.0 specification
734 specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
735 and "sjw":
736
737 $ ip link set canX type can tq 125 prop-seg 6 \
738 phase-seg1 7 phase-seg2 2 sjw 1
739
740 If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
741 recommended CAN bit-timing parameters will be calculated if the bit-
742 rate is specified with the argument "bitrate":
743
744 $ ip link set canX type can bitrate 125000
745
746 Note that this works fine for the most common CAN controllers with
747 standard bit-rates but may *fail* for exotic bit-rates or CAN system
748 clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
749 space and allows user-space tools to solely determine and set the
750 bit-timing parameters. The CAN controller specific bit-timing
751 constants can be used for that purpose. They are listed by the
752 following command:
753
754 $ ip -details link show can0
755 ...
756 sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
757
758 6.5.3 Starting and stopping the CAN network device
759
760 A CAN network device is started or stopped as usual with the command
761 "ifconfig canX up/down" or "ip link set canX up/down". Be aware that
762 you *must* define proper bit-timing parameters for real CAN devices
763 before you can start it to avoid error-prone default settings:
764
765 $ ip link set canX up type can bitrate 125000
766
767 A device may enter the "bus-off" state if too much errors occurred on
768 the CAN bus. Then no more messages are received or sent. An automatic
769 bus-off recovery can be enabled by setting the "restart-ms" to a
770 non-zero value, e.g.:
771
772 $ ip link set canX type can restart-ms 100
773
774 Alternatively, the application may realize the "bus-off" condition
775 by monitoring CAN error frames and do a restart when appropriate with
776 the command:
777
778 $ ip link set canX type can restart
779
780 Note that a restart will also create a CAN error frame (see also
781 chapter 3.4).
624 782
625 On the project website http://developer.berlios.de/projects/socketcan 783 6.6 Supported CAN hardware
626 there are different drivers available:
627 784
628 vcan: Virtual CAN interface driver (if no real hardware is available) 785 Please check the "Kconfig" file in "drivers/net/can" to get an actual
629 sja1000: Philips SJA1000 CAN controller (recommended) 786 list of the support CAN hardware. On the Socket CAN project website
630 i82527: Intel i82527 CAN controller 787 (see chapter 7) there might be further drivers available, also for
631 mscan: Motorola/Freescale CAN controller (e.g. inside SOC MPC5200) 788 older kernel versions.
632 ccan: CCAN controller core (e.g. inside SOC h7202)
633 slcan: For a bunch of CAN adaptors that are attached via a
634 serial line ASCII protocol (for serial / USB adaptors)
635 789
636 Additionally the different CAN adaptors (ISA/PCI/PCMCIA/USB/Parport) 7907. Socket CAN resources
637 from PEAK Systemtechnik support the CAN netdevice driver model 791-----------------------
638 since Linux driver v6.0: http://www.peak-system.com/linux/index.htm
639 792
640 Please check the Mailing Lists on the berlios OSS project website. 793 You can find further resources for Socket CAN like user space tools,
794 support for old kernel versions, more drivers, mailing lists, etc.
795 at the BerliOS OSS project website for Socket CAN:
641 796
642 6.6 todo 797 http://developer.berlios.de/projects/socketcan
643 798
644 The configuration interface for CAN network drivers is still an open 799 If you have questions, bug fixes, etc., don't hesitate to post them to
645 issue that has not been finalized in the socketcan project. Also the 800 the Socketcan-Users mailing list. But please search the archives first.
646 idea of having a library module (candev.ko) that holds functions
647 that are needed by all CAN netdevices is not ready to ship.
648 Your contribution is welcome.
649 801
6507. Credits 8028. Credits
651---------- 803----------
652 804
653 Oliver Hartkopp (PF_CAN core, filters, drivers, bcm) 805 Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
654 Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan) 806 Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
655 Jan Kizka (RT-SocketCAN core, Socket-API reconciliation) 807 Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
656 Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews) 808 Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews,
809 CAN device driver interface, MSCAN driver)
657 Robert Schwebel (design reviews, PTXdist integration) 810 Robert Schwebel (design reviews, PTXdist integration)
658 Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers) 811 Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
659 Benedikt Spranger (reviews) 812 Benedikt Spranger (reviews)
660 Thomas Gleixner (LKML reviews, coding style, posting hints) 813 Thomas Gleixner (LKML reviews, coding style, posting hints)
661 Andrey Volkov (kernel subtree structure, ioctls, mscan driver) 814 Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
662 Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003) 815 Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
663 Klaus Hitschler (PEAK driver integration) 816 Klaus Hitschler (PEAK driver integration)
664 Uwe Koppe (CAN netdevices with PF_PACKET approach) 817 Uwe Koppe (CAN netdevices with PF_PACKET approach)
665 Michael Schulze (driver layer loopback requirement, RT CAN drivers review) 818 Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
819 Pavel Pisa (Bit-timing calculation)
820 Sascha Hauer (SJA1000 platform driver)
821 Sebastian Haas (SJA1000 EMS PCI driver)
822 Markus Plessing (SJA1000 EMS PCI driver)
823 Per Dalen (SJA1000 Kvaser PCI driver)
824 Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/dm9000.txt b/Documentation/networking/dm9000.txt
index 65df3dea5561..5552e2e575c5 100644
--- a/Documentation/networking/dm9000.txt
+++ b/Documentation/networking/dm9000.txt
@@ -129,7 +129,7 @@ PHY Link state polling
129---------------------- 129----------------------
130 130
131The driver keeps track of the link state and informs the network core 131The driver keeps track of the link state and informs the network core
132about link (carrier) availablilty. This is managed by several methods 132about link (carrier) availability. This is managed by several methods
133depending on the version of the chip and on which PHY is being used. 133depending on the version of the chip and on which PHY is being used.
134 134
135For the internal PHY, the original (and currently default) method is 135For the internal PHY, the original (and currently default) method is
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
new file mode 100644
index 000000000000..a0280ad2edc9
--- /dev/null
+++ b/Documentation/networking/ieee802154.txt
@@ -0,0 +1,76 @@
1
2 Linux IEEE 802.15.4 implementation
3
4
5Introduction
6============
7
8The Linux-ZigBee project goal is to provide complete implementation
9of IEEE 802.15.4 / ZigBee / 6LoWPAN protocols. IEEE 802.15.4 is a stack
10of protocols for organizing Low-Rate Wireless Personal Area Networks.
11
12Currently only IEEE 802.15.4 layer is implemented. We have choosen
13to use plain Berkeley socket API, the generic Linux networking stack
14to transfer IEEE 802.15.4 messages and a special protocol over genetlink
15for configuration/management
16
17
18Socket API
19==========
20
21int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
22.....
23
24The address family, socket addresses etc. are defined in the
25include/net/ieee802154/af_ieee802154.h header or in the special header
26in our userspace package (see either linux-zigbee sourceforge download page
27or git tree at git://linux-zigbee.git.sourceforge.net/gitroot/linux-zigbee).
28
29One can use SOCK_RAW for passing raw data towards device xmit function. YMMV.
30
31
32MLME - MAC Level Management
33============================
34
35Most of IEEE 802.15.4 MLME interfaces are directly mapped on netlink commands.
36See the include/net/ieee802154/nl802154.h header. Our userspace tools package
37(see above) provides CLI configuration utility for radio interfaces and simple
38coordinator for IEEE 802.15.4 networks as an example users of MLME protocol.
39
40
41Kernel side
42=============
43
44Like with WiFi, there are several types of devices implementing IEEE 802.15.4.
451) 'HardMAC'. The MAC layer is implemented in the device itself, the device
46 exports MLME and data API.
472) 'SoftMAC' or just radio. These types of devices are just radio transceivers
48 possibly with some kinds of acceleration like automatic CRC computation and
49 comparation, automagic ACK handling, address matching, etc.
50
51Those types of devices require different approach to be hooked into Linux kernel.
52
53
54HardMAC
55=======
56
57See the header include/net/ieee802154/netdevice.h. You have to implement Linux
58net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family
59code via plain sk_buffs. The control block of sk_buffs will contain additional
60info as described in the struct ieee802154_mac_cb.
61
62To hook the MLME interface you have to populate the ml_priv field of your
63net_device with a pointer to struct ieee802154_mlme_ops instance. All fields are
64required.
65
66We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c
67
68
69SoftMAC
70=======
71
72We are going to provide intermediate layer impelementing IEEE 802.15.4 MAC
73in software. This is currently WIP.
74
75See header include/net/ieee802154/mac802154.h and several drivers in
76drivers/ieee802154/
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index b121c5db707f..8be76235fe67 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -168,7 +168,16 @@ tcp_dsack - BOOLEAN
168 Allows TCP to send "duplicate" SACKs. 168 Allows TCP to send "duplicate" SACKs.
169 169
170tcp_ecn - BOOLEAN 170tcp_ecn - BOOLEAN
171 Enable Explicit Congestion Notification in TCP. 171 Enable Explicit Congestion Notification (ECN) in TCP. ECN is only
172 used when both ends of the TCP flow support it. It is useful to
173 avoid losses due to congestion (when the bottleneck router supports
174 ECN).
175 Possible values are:
176 0 disable ECN
177 1 ECN enabled
178 2 Only server-side ECN enabled. If the other end does
179 not support ECN, behavior is like with ECN disabled.
180 Default: 2
172 181
173tcp_fack - BOOLEAN 182tcp_fack - BOOLEAN
174 Enable FACK congestion avoidance and fast retransmission. 183 Enable FACK congestion avoidance and fast retransmission.
@@ -1048,6 +1057,13 @@ disable_ipv6 - BOOLEAN
1048 address. 1057 address.
1049 Default: FALSE (enable IPv6 operation) 1058 Default: FALSE (enable IPv6 operation)
1050 1059
1060 When this value is changed from 1 to 0 (IPv6 is being enabled),
1061 it will dynamically create a link-local address on the given
1062 interface and start Duplicate Address Detection, if necessary.
1063
1064 When this value is changed from 0 to 1 (IPv6 is being disabled),
1065 it will dynamically delete all address on the given interface.
1066
1051accept_dad - INTEGER 1067accept_dad - INTEGER
1052 Whether to accept DAD (Duplicate Address Detection). 1068 Whether to accept DAD (Duplicate Address Detection).
1053 0: Disable DAD 1069 0: Disable DAD
diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt
index 268e5c103dd8..9fd7e21296c8 100644
--- a/Documentation/networking/ipv6.txt
+++ b/Documentation/networking/ipv6.txt
@@ -33,3 +33,40 @@ disable
33 33
34 A reboot is required to enable IPv6. 34 A reboot is required to enable IPv6.
35 35
36autoconf
37
38 Specifies whether to enable IPv6 address autoconfiguration
39 on all interfaces. This might be used when one does not wish
40 for addresses to be automatically generated from prefixes
41 received in Router Advertisements.
42
43 The possible values and their effects are:
44
45 0
46 IPv6 address autoconfiguration is disabled on all interfaces.
47
48 Only the IPv6 loopback address (::1) and link-local addresses
49 will be added to interfaces.
50
51 1
52 IPv6 address autoconfiguration is enabled on all interfaces.
53
54 This is the default value.
55
56disable_ipv6
57
58 Specifies whether to disable IPv6 on all interfaces.
59 This might be used when no IPv6 addresses are desired.
60
61 The possible values and their effects are:
62
63 0
64 IPv6 is enabled on all interfaces.
65
66 This is the default value.
67
68 1
69 IPv6 is disabled on all interfaces.
70
71 No IPv6 addresses will be added to interfaces.
72
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
index 2451f551c505..63214b280e00 100644
--- a/Documentation/networking/l2tp.txt
+++ b/Documentation/networking/l2tp.txt
@@ -158,7 +158,7 @@ Sample Userspace Code
158 } 158 }
159 return 0; 159 return 0;
160 160
161Miscellanous 161Miscellaneous
162============ 162============
163 163
164The PPPoL2TP driver was developed as part of the OpenL2TP project by 164The PPPoL2TP driver was developed as part of the OpenL2TP project by
diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt
index 84906ef3ed6e..b30e81ad5307 100644
--- a/Documentation/networking/mac80211-injection.txt
+++ b/Documentation/networking/mac80211-injection.txt
@@ -12,38 +12,22 @@ following format:
12The radiotap format is discussed in 12The radiotap format is discussed in
13./Documentation/networking/radiotap-headers.txt. 13./Documentation/networking/radiotap-headers.txt.
14 14
15Despite 13 radiotap argument types are currently defined, most only make sense 15Despite many radiotap parameters being currently defined, most only make sense
16to appear on received packets. The following information is parsed from the 16to appear on received packets. The following information is parsed from the
17radiotap headers and used to control injection: 17radiotap headers and used to control injection:
18 18
19 * IEEE80211_RADIOTAP_RATE
20
21 rate in 500kbps units, automatic if invalid or not present
22
23
24 * IEEE80211_RADIOTAP_ANTENNA
25
26 antenna to use, automatic if not present
27
28
29 * IEEE80211_RADIOTAP_DBM_TX_POWER
30
31 transmit power in dBm, automatic if not present
32
33
34 * IEEE80211_RADIOTAP_FLAGS 19 * IEEE80211_RADIOTAP_FLAGS
35 20
36 IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated 21 IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
37 IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available 22 IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
38 IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the 23 IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
39 current fragmentation threshold. Note that 24 current fragmentation threshold.
40 this flag is only reliable when software 25
41 fragmentation is enabled)
42 26
43The injection code can also skip all other currently defined radiotap fields 27The injection code can also skip all other currently defined radiotap fields
44facilitating replay of captured radiotap headers directly. 28facilitating replay of captured radiotap headers directly.
45 29
46Here is an example valid radiotap header defining these three parameters 30Here is an example valid radiotap header defining some parameters
47 31
48 0x00, 0x00, // <-- radiotap version 32 0x00, 0x00, // <-- radiotap version
49 0x0b, 0x00, // <- radiotap header length 33 0x0b, 0x00, // <- radiotap header length
@@ -72,8 +56,8 @@ interface), along the following lines:
72... 56...
73 r = pcap_inject(ppcap, u8aSendBuffer, nLength); 57 r = pcap_inject(ppcap, u8aSendBuffer, nLength);
74 58
75You can also find sources for a complete inject test applet here: 59You can also find a link to a complete inject application here:
76 60
77http://penumbra.warmcat.com/_twk/tiki-index.php?page=packetspammer 61http://wireless.kernel.org/en/users/Documentation/packetspammer
78 62
79Andy Green <andy@warmcat.com> 63Andy Green <andy@warmcat.com>
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index a2ab6a0b116d..87b3d15f523a 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -74,7 +74,7 @@ dev->hard_start_xmit:
74 for this and return NETDEV_TX_LOCKED when the spin lock fails. 74 for this and return NETDEV_TX_LOCKED when the spin lock fails.
75 The locking there should also properly protect against 75 The locking there should also properly protect against
76 set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated. 76 set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated.
77 Dont use it for new drivers. 77 Don't use it for new drivers.
78 78
79 Context: Process with BHs disabled or BH (timer), 79 Context: Process with BHs disabled or BH (timer),
80 will be called with interrupts disabled by netconsole. 80 will be called with interrupts disabled by netconsole.
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt
index c9074f9b78bb..1a77a3cfae54 100644
--- a/Documentation/networking/operstates.txt
+++ b/Documentation/networking/operstates.txt
@@ -38,9 +38,6 @@ ifinfomsg::if_flags & IFF_LOWER_UP:
38ifinfomsg::if_flags & IFF_DORMANT: 38ifinfomsg::if_flags & IFF_DORMANT:
39 Driver has signaled netif_dormant_on() 39 Driver has signaled netif_dormant_on()
40 40
41These interface flags can also be queried without netlink using the
42SIOCGIFFLAGS ioctl.
43
44TLV IFLA_OPERSTATE 41TLV IFLA_OPERSTATE
45 42
46contains RFC2863 state of the interface in numeric representation: 43contains RFC2863 state of the interface in numeric representation:
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 07c53d596035..a22fd85e3796 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -4,16 +4,18 @@
4 4
5This file documents the CONFIG_PACKET_MMAP option available with the PACKET 5This file documents the CONFIG_PACKET_MMAP option available with the PACKET
6socket interface on 2.4 and 2.6 kernels. This type of sockets is used for 6socket interface on 2.4 and 2.6 kernels. This type of sockets is used for
7capture network traffic with utilities like tcpdump or any other that uses 7capture network traffic with utilities like tcpdump or any other that needs
8the libpcap library. 8raw access to network interface.
9
10You can find the latest version of this document at
11 9
10You can find the latest version of this document at:
12 http://pusa.uv.es/~ulisses/packet_mmap/ 11 http://pusa.uv.es/~ulisses/packet_mmap/
13 12
14Please send me your comments to 13Howto can be found at:
14 http://wiki.gnu-log.net (packet_mmap)
15 15
16Please send your comments to
16 Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es> 17 Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
18 Johann Baudy <johann.baudy@gnu-log.net>
17 19
18------------------------------------------------------------------------------- 20-------------------------------------------------------------------------------
19+ Why use PACKET_MMAP 21+ Why use PACKET_MMAP
@@ -25,19 +27,24 @@ to capture each packet, it requires two if you want to get packet's
25timestamp (like libpcap always does). 27timestamp (like libpcap always does).
26 28
27In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size 29In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size
28configurable circular buffer mapped in user space. This way reading packets just 30configurable circular buffer mapped in user space that can be used to either
29needs to wait for them, most of the time there is no need to issue a single 31send or receive packets. This way reading packets just needs to wait for them,
30system call. By using a shared buffer between the kernel and the user 32most of the time there is no need to issue a single system call. Concerning
31also has the benefit of minimizing packet copies. 33transmission, multiple packets can be sent through one system call to get the
32 34highest bandwidth.
33It's fine to use PACKET_MMAP to improve the performance of the capture process, 35By using a shared buffer between the kernel and the user also has the benefit
34but it isn't everything. At least, if you are capturing at high speeds (this 36of minimizing packet copies.
35is relative to the cpu speed), you should check if the device driver of your 37
36network interface card supports some sort of interrupt load mitigation or 38It's fine to use PACKET_MMAP to improve the performance of the capture and
37(even better) if it supports NAPI, also make sure it is enabled. 39transmission process, but it isn't everything. At least, if you are capturing
40at high speeds (this is relative to the cpu speed), you should check if the
41device driver of your network interface card supports some sort of interrupt
42load mitigation or (even better) if it supports NAPI, also make sure it is
43enabled. For transmission, check the MTU (Maximum Transmission Unit) used and
44supported by devices of your network.
38 45
39-------------------------------------------------------------------------------- 46--------------------------------------------------------------------------------
40+ How to use CONFIG_PACKET_MMAP 47+ How to use CONFIG_PACKET_MMAP to improve capture process
41-------------------------------------------------------------------------------- 48--------------------------------------------------------------------------------
42 49
43From the user standpoint, you should use the higher level libpcap library, which 50From the user standpoint, you should use the higher level libpcap library, which
@@ -57,7 +64,7 @@ the low level details or want to improve libpcap by including PACKET_MMAP
57support. 64support.
58 65
59-------------------------------------------------------------------------------- 66--------------------------------------------------------------------------------
60+ How to use CONFIG_PACKET_MMAP directly 67+ How to use CONFIG_PACKET_MMAP directly to improve capture process
61-------------------------------------------------------------------------------- 68--------------------------------------------------------------------------------
62 69
63From the system calls stand point, the use of PACKET_MMAP involves 70From the system calls stand point, the use of PACKET_MMAP involves
@@ -66,6 +73,7 @@ the following process:
66 73
67[setup] socket() -------> creation of the capture socket 74[setup] socket() -------> creation of the capture socket
68 setsockopt() ---> allocation of the circular buffer (ring) 75 setsockopt() ---> allocation of the circular buffer (ring)
76 option: PACKET_RX_RING
69 mmap() ---------> mapping of the allocated buffer to the 77 mmap() ---------> mapping of the allocated buffer to the
70 user process 78 user process
71 79
@@ -97,13 +105,75 @@ also the mapping of the circular buffer in the user process and
97the use of this buffer. 105the use of this buffer.
98 106
99-------------------------------------------------------------------------------- 107--------------------------------------------------------------------------------
108+ How to use CONFIG_PACKET_MMAP directly to improve transmission process
109--------------------------------------------------------------------------------
110Transmission process is similar to capture as shown below.
111
112[setup] socket() -------> creation of the transmission socket
113 setsockopt() ---> allocation of the circular buffer (ring)
114 option: PACKET_TX_RING
115 bind() ---------> bind transmission socket with a network interface
116 mmap() ---------> mapping of the allocated buffer to the
117 user process
118
119[transmission] poll() ---------> wait for free packets (optional)
120 send() ---------> send all packets that are set as ready in
121 the ring
122 The flag MSG_DONTWAIT can be used to return
123 before end of transfer.
124
125[shutdown] close() --------> destruction of the transmission socket and
126 deallocation of all associated resources.
127
128Binding the socket to your network interface is mandatory (with zero copy) to
129know the header size of frames used in the circular buffer.
130
131As capture, each frame contains two parts:
132
133 --------------------
134| struct tpacket_hdr | Header. It contains the status of
135| | of this frame
136|--------------------|
137| data buffer |
138. . Data that will be sent over the network interface.
139. .
140 --------------------
141
142 bind() associates the socket to your network interface thanks to
143 sll_ifindex parameter of struct sockaddr_ll.
144
145 Initialization example:
146
147 struct sockaddr_ll my_addr;
148 struct ifreq s_ifr;
149 ...
150
151 strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
152
153 /* get interface index of eth0 */
154 ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
155
156 /* fill sockaddr_ll struct to prepare binding */
157 my_addr.sll_family = AF_PACKET;
158 my_addr.sll_protocol = ETH_P_ALL;
159 my_addr.sll_ifindex = s_ifr.ifr_ifindex;
160
161 /* bind socket to eth0 */
162 bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
163
164 A complete tutorial is available at: http://wiki.gnu-log.net/
165
166--------------------------------------------------------------------------------
100+ PACKET_MMAP settings 167+ PACKET_MMAP settings
101-------------------------------------------------------------------------------- 168--------------------------------------------------------------------------------
102 169
103 170
104To setup PACKET_MMAP from user level code is done with a call like 171To setup PACKET_MMAP from user level code is done with a call like
105 172
173 - Capture process
106 setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req)) 174 setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req))
175 - Transmission process
176 setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req))
107 177
108The most significant argument in the previous call is the req parameter, 178The most significant argument in the previous call is the req parameter,
109this parameter must to have the following structure: 179this parameter must to have the following structure:
@@ -117,11 +187,11 @@ this parameter must to have the following structure:
117 }; 187 };
118 188
119This structure is defined in /usr/include/linux/if_packet.h and establishes a 189This structure is defined in /usr/include/linux/if_packet.h and establishes a
120circular buffer (ring) of unswappable memory mapped in the capture process. 190circular buffer (ring) of unswappable memory.
121Being mapped in the capture process allows reading the captured frames and 191Being mapped in the capture process allows reading the captured frames and
122related meta-information like timestamps without requiring a system call. 192related meta-information like timestamps without requiring a system call.
123 193
124Captured frames are grouped in blocks. Each block is a physically contiguous 194Frames are grouped in blocks. Each block is a physically contiguous
125region of memory and holds tp_block_size/tp_frame_size frames. The total number 195region of memory and holds tp_block_size/tp_frame_size frames. The total number
126of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because 196of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because
127 197
@@ -336,6 +406,7 @@ struct tpacket_hdr). If this field is 0 means that the frame is ready
336to be used for the kernel, If not, there is a frame the user can read 406to be used for the kernel, If not, there is a frame the user can read
337and the following flags apply: 407and the following flags apply:
338 408
409+++ Capture process:
339 from include/linux/if_packet.h 410 from include/linux/if_packet.h
340 411
341 #define TP_STATUS_COPY 2 412 #define TP_STATUS_COPY 2
@@ -391,6 +462,37 @@ packets are in the ring:
391It doesn't incur in a race condition to first check the status value and 462It doesn't incur in a race condition to first check the status value and
392then poll for frames. 463then poll for frames.
393 464
465
466++ Transmission process
467Those defines are also used for transmission:
468
469 #define TP_STATUS_AVAILABLE 0 // Frame is available
470 #define TP_STATUS_SEND_REQUEST 1 // Frame will be sent on next send()
471 #define TP_STATUS_SENDING 2 // Frame is currently in transmission
472 #define TP_STATUS_WRONG_FORMAT 4 // Frame format is not correct
473
474First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a
475packet, the user fills a data buffer of an available frame, sets tp_len to
476current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST.
477This can be done on multiple frames. Once the user is ready to transmit, it
478calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are
479forwarded to the network device. The kernel updates each status of sent
480frames with TP_STATUS_SENDING until the end of transfer.
481At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE.
482
483 header->tp_len = in_i_size;
484 header->tp_status = TP_STATUS_SEND_REQUEST;
485 retval = send(this->socket, NULL, 0, 0);
486
487The user can also use poll() to check if a buffer is available:
488(status == TP_STATUS_SENDING)
489
490 struct pollfd pfd;
491 pfd.fd = fd;
492 pfd.revents = 0;
493 pfd.events = POLLOUT;
494 retval = poll(&pfd, 1, timeout);
495
394-------------------------------------------------------------------------------- 496--------------------------------------------------------------------------------
395+ THANKS 497+ THANKS
396-------------------------------------------------------------------------------- 498--------------------------------------------------------------------------------
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt
index 6a07e45d4a93..6e8ce09f9c73 100644
--- a/Documentation/networking/phonet.txt
+++ b/Documentation/networking/phonet.txt
@@ -36,7 +36,7 @@ Phonet packets have a common header as follows:
36On Linux, the link-layer header includes the pn_media byte (see below). 36On Linux, the link-layer header includes the pn_media byte (see below).
37The next 7 bytes are part of the network-layer header. 37The next 7 bytes are part of the network-layer header.
38 38
39The device ID is split: the 6 higher-order bits consitute the device 39The device ID is split: the 6 higher-order bits constitute the device
40address, while the 2 lower-order bits are used for multiplexing, as are 40address, while the 2 lower-order bits are used for multiplexing, as are
41the 8-bit object identifiers. As such, Phonet can be considered as a 41the 8-bit object identifiers. As such, Phonet can be considered as a
42network layer with 6 bits of address space and 10 bits for transport 42network layer with 6 bits of address space and 10 bits for transport
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index dcf31648414a..eaa1a25946c1 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -89,7 +89,7 @@ added to this document when its support is enabled.
89Device drivers who provide their own built regulatory domain 89Device drivers who provide their own built regulatory domain
90do not need a callback as the channels registered by them are 90do not need a callback as the channels registered by them are
91the only ones that will be allowed and therefore *additional* 91the only ones that will be allowed and therefore *additional*
92cannels cannot be enabled. 92channels cannot be enabled.
93 93
94Example code - drivers hinting an alpha2: 94Example code - drivers hinting an alpha2:
95------------------------------------------ 95------------------------------------------
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 421e7d00ffd0..c9abbd86bc18 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -75,9 +75,6 @@ may need to apply in domain-specific ways to their devices:
75struct bus_type { 75struct bus_type {
76 ... 76 ...
77 int (*suspend)(struct device *dev, pm_message_t state); 77 int (*suspend)(struct device *dev, pm_message_t state);
78 int (*suspend_late)(struct device *dev, pm_message_t state);
79
80 int (*resume_early)(struct device *dev);
81 int (*resume)(struct device *dev); 78 int (*resume)(struct device *dev);
82}; 79};
83 80
@@ -226,20 +223,7 @@ The phases are seen by driver notifications issued in this order:
226 223
227 This call should handle parts of device suspend logic that require 224 This call should handle parts of device suspend logic that require
228 sleeping. It probably does work to quiesce the device which hasn't 225 sleeping. It probably does work to quiesce the device which hasn't
229 been abstracted into class.suspend() or bus.suspend_late(). 226 been abstracted into class.suspend().
230
231 3 bus.suspend_late(dev, message) is called with IRQs disabled, and
232 with only one CPU active. Until the bus.resume_early() phase
233 completes (see later), IRQs are not enabled again. This method
234 won't be exposed by all busses; for message based busses like USB,
235 I2C, or SPI, device interactions normally require IRQs. This bus
236 call may be morphed into a driver call with bus-specific parameters.
237
238 This call might save low level hardware state that might otherwise
239 be lost in the upcoming low power state, and actually put the
240 device into a low power state ... so that in some cases the device
241 may stay partly usable until this late. This "late" call may also
242 help when coping with hardware that behaves badly.
243 227
244The pm_message_t parameter is currently used to refine those semantics 228The pm_message_t parameter is currently used to refine those semantics
245(described later). 229(described later).
@@ -351,19 +335,11 @@ devices processing each phase's calls before the next phase begins.
351 335
352The phases are seen by driver notifications issued in this order: 336The phases are seen by driver notifications issued in this order:
353 337
354 1 bus.resume_early(dev) is called with IRQs disabled, and with 338 1 bus.resume(dev) reverses the effects of bus.suspend(). This may
355 only one CPU active. As with bus.suspend_late(), this method 339 be morphed into a device driver call with bus-specific parameters;
356 won't be supported on busses that require IRQs in order to 340 implementations may sleep.
357 interact with devices.
358
359 This reverses the effects of bus.suspend_late().
360
361 2 bus.resume(dev) is called next. This may be morphed into a device
362 driver call with bus-specific parameters; implementations may sleep.
363
364 This reverses the effects of bus.suspend().
365 341
366 3 class.resume(dev) is called for devices associated with a class 342 2 class.resume(dev) is called for devices associated with a class
367 that has such a method. Implementations may sleep. 343 that has such a method. Implementations may sleep.
368 344
369 This reverses the effects of class.suspend(), and would usually 345 This reverses the effects of class.suspend(), and would usually
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
index 82b7a43aadba..5f83fd24ea84 100644
--- a/Documentation/power/regulator/consumer.txt
+++ b/Documentation/power/regulator/consumer.txt
@@ -178,5 +178,5 @@ Consumers can uregister interest by calling :-
178int regulator_unregister_notifier(struct regulator *regulator, 178int regulator_unregister_notifier(struct regulator *regulator,
179 struct notifier_block *nb); 179 struct notifier_block *nb);
180 180
181Regulators use the kernel notifier framework to send event to thier interested 181Regulators use the kernel notifier framework to send event to their interested
182consumers. 182consumers.
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
index bdcb332bd7fb..0cded696ca01 100644
--- a/Documentation/power/regulator/overview.txt
+++ b/Documentation/power/regulator/overview.txt
@@ -119,7 +119,7 @@ Some terms used in this document:-
119 battery power, USB power) 119 battery power, USB power)
120 120
121 Regulator Domains: is the new current limit within the 121 Regulator Domains: is the new current limit within the
122 regulator operating parameters for input/ouput voltage. 122 regulator operating parameters for input/output voltage.
123 123
124 If the regulator request passes all the constraint tests 124 If the regulator request passes all the constraint tests
125 then the new regulator value is applied. 125 then the new regulator value is applied.
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
index 2ebdc6091ce1..514b94fc931e 100644
--- a/Documentation/power/s2ram.txt
+++ b/Documentation/power/s2ram.txt
@@ -63,7 +63,7 @@ hardware during resume operations where a value can be set that will
63survive a reboot. 63survive a reboot.
64 64
65Consequence is that after a resume (even if it is successful) your system 65Consequence is that after a resume (even if it is successful) your system
66clock will have a value corresponding to the magic mumber instead of the 66clock will have a value corresponding to the magic number instead of the
67correct date/time! It is therefore advisable to use a program like ntp-date 67correct date/time! It is therefore advisable to use a program like ntp-date
68or rdate to reset the correct date/time from an external time source when 68or rdate to reset the correct date/time from an external time source when
69using this trace option. 69using this trace option.
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
index 7b99636564c8..b967cd9137d6 100644
--- a/Documentation/power/userland-swsusp.txt
+++ b/Documentation/power/userland-swsusp.txt
@@ -109,7 +109,7 @@ unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
109still frozen when the device is being closed). 109still frozen when the device is being closed).
110 110
111Currently it is assumed that the userland utilities reading/writing the 111Currently it is assumed that the userland utilities reading/writing the
112snapshot image from/to the kernel will use a swap parition, called the resume 112snapshot image from/to the kernel will use a swap partition, called the resume
113partition, or a swap file as storage space (if a swap file is used, the resume 113partition, or a swap file as storage space (if a swap file is used, the resume
114partition is the partition that holds this file). However, this is not really 114partition is the partition that holds this file). However, this is not really
115required, as they can use, for example, a special (blank) suspend partition or 115required, as they can use, for example, a special (blank) suspend partition or
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index d16b7a1c3793..8d999d862d0e 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1356,7 +1356,7 @@ platforms are moved over to use the flattened-device-tree model.
1356 - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY 1356 - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY
1357 for, used if phy-address is absent. bit 0x00000001 is 1357 for, used if phy-address is absent. bit 0x00000001 is
1358 MDIO address 0. 1358 MDIO address 0.
1359 For Axon it can be absent, thouugh my current driver 1359 For Axon it can be absent, though my current driver
1360 doesn't handle phy-address yet so for now, keep 1360 doesn't handle phy-address yet so for now, keep
1361 0x00ffffff in it. 1361 0x00ffffff in it.
1362 - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec 1362 - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
@@ -1438,7 +1438,7 @@ platforms are moved over to use the flattened-device-tree model.
1438 1438
1439 The Xilinx EDK toolchain ships with a set of IP cores (devices) for use 1439 The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
1440 in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range 1440 in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range
1441 of standard device types (network, serial, etc.) and miscellanious 1441 of standard device types (network, serial, etc.) and miscellaneous
1442 devices (gpio, LCD, spi, etc). Also, since these devices are 1442 devices (gpio, LCD, spi, etc). Also, since these devices are
1443 implemented within the fpga fabric every instance of the device can be 1443 implemented within the fpga fabric every instance of the device can be
1444 synthesised with different options that change the behaviour. 1444 synthesised with different options that change the behaviour.
diff --git a/Documentation/powerpc/dts-bindings/can/sja1000.txt b/Documentation/powerpc/dts-bindings/can/sja1000.txt
new file mode 100644
index 000000000000..d6d209ded937
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/can/sja1000.txt
@@ -0,0 +1,53 @@
1Memory mapped SJA1000 CAN controller from NXP (formerly Philips)
2
3Required properties:
4
5- compatible : should be "nxp,sja1000".
6
7- reg : should specify the chip select, address offset and size required
8 to map the registers of the SJA1000. The size is usually 0x80.
9
10- interrupts: property with a value describing the interrupt source
11 (number and sensitivity) required for the SJA1000.
12
13Optional properties:
14
15- nxp,external-clock-frequency : Frequency of the external oscillator
16 clock in Hz. Note that the internal clock frequency used by the
17 SJA1000 is half of that value. If not specified, a default value
18 of 16000000 (16 MHz) is used.
19
20- nxp,tx-output-mode : operation mode of the TX output control logic:
21 <0x0> : bi-phase output mode
22 <0x1> : normal output mode (default)
23 <0x2> : test output mode
24 <0x3> : clock output mode
25
26- nxp,tx-output-config : TX output pin configuration:
27 <0x01> : TX0 invert
28 <0x02> : TX0 pull-down (default)
29 <0x04> : TX0 pull-up
30 <0x06> : TX0 push-pull
31 <0x08> : TX1 invert
32 <0x10> : TX1 pull-down
33 <0x20> : TX1 pull-up
34 <0x30> : TX1 push-pull
35
36- nxp,clock-out-frequency : clock frequency in Hz on the CLKOUT pin.
37 If not specified or if the specified value is 0, the CLKOUT pin
38 will be disabled.
39
40- nxp,no-comparator-bypass : Allows to disable the CAN input comperator.
41
42For futher information, please have a look to the SJA1000 data sheet.
43
44Examples:
45
46can@3,100 {
47 compatible = "nxp,sja1000";
48 reg = <3 0x100 0x80>;
49 interrupts = <2 0>;
50 interrupt-parent = <&mpic>;
51 nxp,external-clock-frequency = <16000000>;
52};
53
diff --git a/Documentation/powerpc/dts-bindings/ecm.txt b/Documentation/powerpc/dts-bindings/ecm.txt
new file mode 100644
index 000000000000..f514f29c67d6
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/ecm.txt
@@ -0,0 +1,64 @@
1=====================================================================
2E500 LAW & Coherency Module Device Tree Binding
3Copyright (C) 2009 Freescale Semiconductor Inc.
4=====================================================================
5
6Local Access Window (LAW) Node
7
8The LAW node represents the region of CCSR space where local access
9windows are configured. For ECM based devices this is the first 4k
10of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some
11number of local access windows as specified by fsl,num-laws.
12
13PROPERTIES
14
15 - compatible
16 Usage: required
17 Value type: <string>
18 Definition: Must include "fsl,ecm-law"
19
20 - reg
21 Usage: required
22 Value type: <prop-encoded-array>
23 Definition: A standard property. The value specifies the
24 physical address offset and length of the CCSR space
25 registers.
26
27 - fsl,num-laws
28 Usage: required
29 Value type: <u32>
30 Definition: The value specifies the number of local access
31 windows for this device.
32
33=====================================================================
34
35E500 Coherency Module Node
36
37The E500 LAW node represents the region of CCSR space where ECM config
38and error reporting registers exist, this is the second 4k (0x1000)
39of CCSR space.
40
41PROPERTIES
42
43 - compatible
44 Usage: required
45 Value type: <string>
46 Definition: Must include "fsl,CHIP-ecm", "fsl,ecm" where
47 CHIP is the processor (mpc8572, mpc8544, etc.)
48
49 - reg
50 Usage: required
51 Value type: <prop-encoded-array>
52 Definition: A standard property. The value specifies the
53 physical address offset and length of the CCSR space
54 registers.
55
56 - interrupts
57 Usage: required
58 Value type: <prop-encoded-array>
59
60 - interrupt-parent
61 Usage: required
62 Value type: <phandle>
63
64=====================================================================
diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt
index 6c974d28eeb4..e8b5bc24d0ac 100644
--- a/Documentation/powerpc/dts-bindings/fsl/board.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/board.txt
@@ -38,7 +38,7 @@ Required properities:
38- reg : Should contain the address and the length of the GPIO bank 38- reg : Should contain the address and the length of the GPIO bank
39 register. 39 register.
40- #gpio-cells : Should be two. The first cell is the pin number and the 40- #gpio-cells : Should be two. The first cell is the pin number and the
41 second cell is used to specify optional paramters (currently unused). 41 second cell is used to specify optional parameters (currently unused).
42- gpio-controller : Marks the port as GPIO controller. 42- gpio-controller : Marks the port as GPIO controller.
43 43
44Example: 44Example:
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
index 088fc471e03a..160c752484b4 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/cpm.txt
@@ -19,7 +19,7 @@ Example:
19 reg = <119c0 30>; 19 reg = <119c0 30>;
20 } 20 }
21 21
22* Properties common to mulitple CPM/QE devices 22* Properties common to multiple CPM/QE devices
23 23
24- fsl,cpm-command : This value is ORed with the opcode and command flag 24- fsl,cpm-command : This value is ORed with the opcode and command flag
25 to specify the device on which a CPM command operates. 25 to specify the device on which a CPM command operates.
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
index 1815dfede1bc..349f79fd7076 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/gpio.txt
@@ -11,7 +11,7 @@ Required properties:
11 "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", 11 "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d",
12 "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" 12 "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank"
13- #gpio-cells : Should be two. The first cell is the pin number and the 13- #gpio-cells : Should be two. The first cell is the pin number and the
14 second cell is used to specify optional paramters (currently unused). 14 second cell is used to specify optional parameters (currently unused).
15- gpio-controller : Marks the port as GPIO controller. 15- gpio-controller : Marks the port as GPIO controller.
16 16
17Example of three SOC GPIO banks defined as gpio-controller nodes: 17Example of three SOC GPIO banks defined as gpio-controller nodes:
diff --git a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
index 78790d58dc2c..6e37be1eeb2d 100644
--- a/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/cpm_qe/qe.txt
@@ -17,6 +17,9 @@ Required properties:
17- model : precise model of the QE, Can be "QE", "CPM", or "CPM2" 17- model : precise model of the QE, Can be "QE", "CPM", or "CPM2"
18- reg : offset and length of the device registers. 18- reg : offset and length of the device registers.
19- bus-frequency : the clock frequency for QUICC Engine. 19- bus-frequency : the clock frequency for QUICC Engine.
20- fsl,qe-num-riscs: define how many RISC engines the QE has.
21- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
22 threads.
20 23
21Recommended properties 24Recommended properties
22- brg-frequency : the internal clock source frequency for baud-rate 25- brg-frequency : the internal clock source frequency for baud-rate
diff --git a/Documentation/powerpc/dts-bindings/fsl/esdhc.txt b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
index 600846557763..5093ddf900da 100644
--- a/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/esdhc.txt
@@ -5,8 +5,7 @@ for MMC, SD, and SDIO types of memory cards.
5 5
6Required properties: 6Required properties:
7 - compatible : should be 7 - compatible : should be
8 "fsl,<chip>-esdhc", "fsl,mpc8379-esdhc" for MPC83xx processors. 8 "fsl,<chip>-esdhc", "fsl,esdhc"
9 "fsl,<chip>-esdhc", "fsl,mpc8536-esdhc" for MPC85xx processors.
10 - reg : should contain eSDHC registers location and length. 9 - reg : should contain eSDHC registers location and length.
11 - interrupts : should contain eSDHC interrupt. 10 - interrupts : should contain eSDHC interrupt.
12 - interrupt-parent : interrupt source phandle. 11 - interrupt-parent : interrupt source phandle.
@@ -15,7 +14,7 @@ Required properties:
15Example: 14Example:
16 15
17sdhci@2e000 { 16sdhci@2e000 {
18 compatible = "fsl,mpc8378-esdhc", "fsl,mpc8379-esdhc"; 17 compatible = "fsl,mpc8378-esdhc", "fsl,esdhc";
19 reg = <0x2e000 0x1000>; 18 reg = <0x2e000 0x1000>;
20 interrupts = <42 0x8>; 19 interrupts = <42 0x8>;
21 interrupt-parent = <&ipic>; 20 interrupt-parent = <&ipic>;
diff --git a/Documentation/powerpc/dts-bindings/fsl/mcm.txt b/Documentation/powerpc/dts-bindings/fsl/mcm.txt
new file mode 100644
index 000000000000..4ceda9b3b413
--- /dev/null
+++ b/Documentation/powerpc/dts-bindings/fsl/mcm.txt
@@ -0,0 +1,64 @@
1=====================================================================
2MPX LAW & Coherency Module Device Tree Binding
3Copyright (C) 2009 Freescale Semiconductor Inc.
4=====================================================================
5
6Local Access Window (LAW) Node
7
8The LAW node represents the region of CCSR space where local access
9windows are configured. For MCM based devices this is the first 4k
10of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some
11number of local access windows as specified by fsl,num-laws.
12
13PROPERTIES
14
15 - compatible
16 Usage: required
17 Value type: <string>
18 Definition: Must include "fsl,mcm-law"
19
20 - reg
21 Usage: required
22 Value type: <prop-encoded-array>
23 Definition: A standard property. The value specifies the
24 physical address offset and length of the CCSR space
25 registers.
26
27 - fsl,num-laws
28 Usage: required
29 Value type: <u32>
30 Definition: The value specifies the number of local access
31 windows for this device.
32
33=====================================================================
34
35MPX Coherency Module Node
36
37The MPX LAW node represents the region of CCSR space where MCM config
38and error reporting registers exist, this is the second 4k (0x1000)
39of CCSR space.
40
41PROPERTIES
42
43 - compatible
44 Usage: required
45 Value type: <string>
46 Definition: Must include "fsl,CHIP-mcm", "fsl,mcm" where
47 CHIP is the processor (mpc8641, mpc8610, etc.)
48
49 - reg
50 Usage: required
51 Value type: <prop-encoded-array>
52 Definition: A standard property. The value specifies the
53 physical address offset and length of the CCSR space
54 registers.
55
56 - interrupts
57 Usage: required
58 Value type: <prop-encoded-array>
59
60 - interrupt-parent
61 Usage: required
62 Value type: <phandle>
63
64=====================================================================
diff --git a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
index b26b91992c55..bcc30bac6831 100644
--- a/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/msi-pic.txt
@@ -1,6 +1,6 @@
1* Freescale MSI interrupt controller 1* Freescale MSI interrupt controller
2 2
3Reguired properities: 3Required properties:
4- compatible : compatible list, contains 2 entries, 4- compatible : compatible list, contains 2 entries,
5 first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572, 5 first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572,
6 etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on 6 etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on
diff --git a/Documentation/powerpc/dts-bindings/fsl/pmc.txt b/Documentation/powerpc/dts-bindings/fsl/pmc.txt
index 02f6f43ee1b7..07256b7ffcaa 100644
--- a/Documentation/powerpc/dts-bindings/fsl/pmc.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/pmc.txt
@@ -15,8 +15,8 @@ Properties:
15 compatible; all statements below that apply to "fsl,mpc8548-pmc" also 15 compatible; all statements below that apply to "fsl,mpc8548-pmc" also
16 apply to "fsl,mpc8641d-pmc". 16 apply to "fsl,mpc8641d-pmc".
17 17
18 Compatibility does not include bit assigments in SCCR/PMCDR/DEVDISR; these 18 Compatibility does not include bit assignments in SCCR/PMCDR/DEVDISR; these
19 bit assigments are indicated via the sleep specifier in each device's 19 bit assignments are indicated via the sleep specifier in each device's
20 sleep property. 20 sleep property.
21 21
22- reg: For devices compatible with "fsl,mpc8349-pmc", the first resource 22- reg: For devices compatible with "fsl,mpc8349-pmc", the first resource
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
index 06da4d4b44f9..2031ddb33d09 100644
--- a/Documentation/powerpc/qe_firmware.txt
+++ b/Documentation/powerpc/qe_firmware.txt
@@ -225,7 +225,7 @@ For example, to match the 8323, revision 1.0:
225 soc.major = 1 225 soc.major = 1
226 soc.minor = 0 226 soc.minor = 0
227 227
228'padding' is neccessary for structure alignment. This field ensures that the 228'padding' is necessary for structure alignment. This field ensures that the
229'extended_modes' field is aligned on a 64-bit boundary. 229'extended_modes' field is aligned on a 64-bit boundary.
230 230
231'extended_modes' is a bitfield that defines special functionality which has an 231'extended_modes' is a bitfield that defines special functionality which has an
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 7224459b469e..aae8355d3166 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -131,8 +131,8 @@ Example:
131 } 131 }
132 132
133 /* Add new node and rebalance tree. */ 133 /* Add new node and rebalance tree. */
134 rb_link_node(data->node, parent, new); 134 rb_link_node(&data->node, parent, new);
135 rb_insert_color(data->node, root); 135 rb_insert_color(&data->node, root);
136 136
137 return TRUE; 137 return TRUE;
138 } 138 }
@@ -146,10 +146,10 @@ To remove an existing node from a tree, call:
146 146
147Example: 147Example:
148 148
149 struct mytype *data = mysearch(mytree, "walrus"); 149 struct mytype *data = mysearch(&mytree, "walrus");
150 150
151 if (data) { 151 if (data) {
152 rb_erase(data->node, mytree); 152 rb_erase(&data->node, &mytree);
153 myfree(data); 153 myfree(data);
154 } 154 }
155 155
@@ -188,5 +188,5 @@ Example:
188 188
189 struct rb_node *node; 189 struct rb_node *node;
190 for (node = rb_first(&mytree); node; node = rb_next(node)) 190 for (node = rb_first(&mytree); node; node = rb_next(node))
191 printk("key=%s\n", rb_entry(node, int, keystring)); 191 printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
192 192
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
index 4d3ee317a4a3..1b74b5f30af4 100644
--- a/Documentation/rfkill.txt
+++ b/Documentation/rfkill.txt
@@ -1,575 +1,136 @@
1rfkill - RF switch subsystem support 1rfkill - RF kill switch support
2==================================== 2===============================
3 3
41 Introduction 41. Introduction
52 Implementation details 52. Implementation details
63 Kernel driver guidelines 63. Kernel driver guidelines
73.1 wireless device drivers 74. Kernel API
83.2 platform/switch drivers 85. Userspace support
93.3 input device drivers
104 Kernel API
115 Userspace support
12 9
13 10
141. Introduction: 111. Introduction
15 12
16The rfkill switch subsystem exists to add a generic interface to circuitry that 13The rfkill subsystem provides a generic interface to disabling any radio
17can enable or disable the signal output of a wireless *transmitter* of any 14transmitter in the system. When a transmitter is blocked, it shall not
18type. By far, the most common use is to disable radio-frequency transmitters. 15radiate any power.
19 16
20Note that disabling the signal output means that the the transmitter is to be 17The subsystem also provides the ability to react on button presses and
21made to not emit any energy when "blocked". rfkill is not about blocking data 18disable all transmitters of a certain type (or all). This is intended for
22transmissions, it is about blocking energy emission. 19situations where transmitters need to be turned off, for example on
20aircraft.
23 21
24The rfkill subsystem offers support for keys and switches often found on
25laptops to enable wireless devices like WiFi and Bluetooth, so that these keys
26and switches actually perform an action in all wireless devices of a given type
27attached to the system.
28 22
29The buttons to enable and disable the wireless transmitters are important in
30situations where the user is for example using his laptop on a location where
31radio-frequency transmitters _must_ be disabled (e.g. airplanes).
32 23
33Because of this requirement, userspace support for the keys should not be made 242. Implementation details
34mandatory. Because userspace might want to perform some additional smarter
35tasks when the key is pressed, rfkill provides userspace the possibility to
36take over the task to handle the key events.
37
38===============================================================================
392: Implementation details
40 25
41The rfkill subsystem is composed of various components: the rfkill class, the 26The rfkill subsystem is composed of various components: the rfkill class, the
42rfkill-input module (an input layer handler), and some specific input layer 27rfkill-input module (an input layer handler), and some specific input layer
43events. 28events.
44 29
45The rfkill class provides kernel drivers with an interface that allows them to 30The rfkill class is provided for kernel drivers to register their radio
46know when they should enable or disable a wireless network device transmitter. 31transmitter with the kernel, provide methods for turning it on and off and,
47This is enabled by the CONFIG_RFKILL Kconfig option. 32optionally, letting the system know about hardware-disabled states that may
48 33be implemented on the device. This code is enabled with the CONFIG_RFKILL
49The rfkill class support makes sure userspace will be notified of all state 34Kconfig option, which drivers can "select".
50changes on rfkill devices through uevents. It provides a notification chain
51for interested parties in the kernel to also get notified of rfkill state
52changes in other drivers. It creates several sysfs entries which can be used
53by userspace. See section "Userspace support".
54
55The rfkill-input module provides the kernel with the ability to implement a
56basic response when the user presses a key or button (or toggles a switch)
57related to rfkill functionality. It is an in-kernel implementation of default
58policy of reacting to rfkill-related input events and neither mandatory nor
59required for wireless drivers to operate. It is enabled by the
60CONFIG_RFKILL_INPUT Kconfig option.
61
62rfkill-input is a rfkill-related events input layer handler. This handler will
63listen to all rfkill key events and will change the rfkill state of the
64wireless devices accordingly. With this option enabled userspace could either
65do nothing or simply perform monitoring tasks.
66
67The rfkill-input module also provides EPO (emergency power-off) functionality
68for all wireless transmitters. This function cannot be overridden, and it is
69always active. rfkill EPO is related to *_RFKILL_ALL input layer events.
70
71
72Important terms for the rfkill subsystem:
73
74In order to avoid confusion, we avoid the term "switch" in rfkill when it is
75referring to an electronic control circuit that enables or disables a
76transmitter. We reserve it for the physical device a human manipulates
77(which is an input device, by the way):
78
79rfkill switch:
80
81 A physical device a human manipulates. Its state can be perceived by
82 the kernel either directly (through a GPIO pin, ACPI GPE) or by its
83 effect on a rfkill line of a wireless device.
84
85rfkill controller:
86
87 A hardware circuit that controls the state of a rfkill line, which a
88 kernel driver can interact with *to modify* that state (i.e. it has
89 either write-only or read/write access).
90
91rfkill line:
92
93 An input channel (hardware or software) of a wireless device, which
94 causes a wireless transmitter to stop emitting energy (BLOCK) when it
95 is active. Point of view is extremely important here: rfkill lines are
96 always seen from the PoV of a wireless device (and its driver).
97
98soft rfkill line/software rfkill line:
99
100 A rfkill line the wireless device driver can directly change the state
101 of. Related to rfkill_state RFKILL_STATE_SOFT_BLOCKED.
102
103hard rfkill line/hardware rfkill line:
104
105 A rfkill line that works fully in hardware or firmware, and that cannot
106 be overridden by the kernel driver. The hardware device or the
107 firmware just exports its status to the driver, but it is read-only.
108 Related to rfkill_state RFKILL_STATE_HARD_BLOCKED.
109
110The enum rfkill_state describes the rfkill state of a transmitter:
111
112When a rfkill line or rfkill controller is in the RFKILL_STATE_UNBLOCKED state,
113the wireless transmitter (radio TX circuit for example) is *enabled*. When the
114it is in the RFKILL_STATE_SOFT_BLOCKED or RFKILL_STATE_HARD_BLOCKED, the
115wireless transmitter is to be *blocked* from operating.
116
117RFKILL_STATE_SOFT_BLOCKED indicates that a call to toggle_radio() can change
118that state. RFKILL_STATE_HARD_BLOCKED indicates that a call to toggle_radio()
119will not be able to change the state and will return with a suitable error if
120attempts are made to set the state to RFKILL_STATE_UNBLOCKED.
121
122RFKILL_STATE_HARD_BLOCKED is used by drivers to signal that the device is
123locked in the BLOCKED state by a hardwire rfkill line (typically an input pin
124that, when active, forces the transmitter to be disabled) which the driver
125CANNOT override.
126
127Full rfkill functionality requires two different subsystems to cooperate: the
128input layer and the rfkill class. The input layer issues *commands* to the
129entire system requesting that devices registered to the rfkill class change
130state. The way this interaction happens is not complex, but it is not obvious
131either:
132
133Kernel Input layer:
134
135 * Generates KEY_WWAN, KEY_WLAN, KEY_BLUETOOTH, SW_RFKILL_ALL, and
136 other such events when the user presses certain keys, buttons, or
137 toggles certain physical switches.
138
139 THE INPUT LAYER IS NEVER USED TO PROPAGATE STATUS, NOTIFICATIONS OR THE
140 KIND OF STUFF AN ON-SCREEN-DISPLAY APPLICATION WOULD REPORT. It is
141 used to issue *commands* for the system to change behaviour, and these
142 commands may or may not be carried out by some kernel driver or
143 userspace application. It follows that doing user feedback based only
144 on input events is broken, as there is no guarantee that an input event
145 will be acted upon.
146
147 Most wireless communication device drivers implementing rfkill
148 functionality MUST NOT generate these events, and have no reason to
149 register themselves with the input layer. Doing otherwise is a common
150 misconception. There is an API to propagate rfkill status change
151 information, and it is NOT the input layer.
152
153rfkill class:
154
155 * Calls a hook in a driver to effectively change the wireless
156 transmitter state;
157 * Keeps track of the wireless transmitter state (with help from
158 the driver);
159 * Generates userspace notifications (uevents) and a call to a
160 notification chain (kernel) when there is a wireless transmitter
161 state change;
162 * Connects a wireless communications driver with the common rfkill
163 control system, which, for example, allows actions such as
164 "switch all bluetooth devices offline" to be carried out by
165 userspace or by rfkill-input.
166
167 THE RFKILL CLASS NEVER ISSUES INPUT EVENTS. THE RFKILL CLASS DOES
168 NOT LISTEN TO INPUT EVENTS. NO DRIVER USING THE RFKILL CLASS SHALL
169 EVER LISTEN TO, OR ACT ON RFKILL INPUT EVENTS. Doing otherwise is
170 a layering violation.
171
172 Most wireless data communication drivers in the kernel have just to
173 implement the rfkill class API to work properly. Interfacing to the
174 input layer is not often required (and is very often a *bug*) on
175 wireless drivers.
176
177 Platform drivers often have to attach to the input layer to *issue*
178 (but never to listen to) rfkill events for rfkill switches, and also to
179 the rfkill class to export a control interface for the platform rfkill
180 controllers to the rfkill subsystem. This does NOT mean the rfkill
181 switch is attached to a rfkill class (doing so is almost always wrong).
182 It just means the same kernel module is the driver for different
183 devices (rfkill switches and rfkill controllers).
184
185
186Userspace input handlers (uevents) or kernel input handlers (rfkill-input):
187
188 * Implements the policy of what should happen when one of the input
189 layer events related to rfkill operation is received.
190 * Uses the sysfs interface (userspace) or private rfkill API calls
191 to tell the devices registered with the rfkill class to change
192 their state (i.e. translates the input layer event into real
193 action).
194
195 * rfkill-input implements EPO by handling EV_SW SW_RFKILL_ALL 0
196 (power off all transmitters) in a special way: it ignores any
197 overrides and local state cache and forces all transmitters to the
198 RFKILL_STATE_SOFT_BLOCKED state (including those which are already
199 supposed to be BLOCKED).
200 * rfkill EPO will remain active until rfkill-input receives an
201 EV_SW SW_RFKILL_ALL 1 event. While the EPO is active, transmitters
202 are locked in the blocked state (rfkill will refuse to unblock them).
203 * rfkill-input implements different policies that the user can
204 select for handling EV_SW SW_RFKILL_ALL 1. It will unlock rfkill,
205 and either do nothing (leave transmitters blocked, but now unlocked),
206 restore the transmitters to their state before the EPO, or unblock
207 them all.
208
209Userspace uevent handler or kernel platform-specific drivers hooked to the
210rfkill notifier chain:
211
212 * Taps into the rfkill notifier chain or to KOBJ_CHANGE uevents,
213 in order to know when a device that is registered with the rfkill
214 class changes state;
215 * Issues feedback notifications to the user;
216 * In the rare platforms where this is required, synthesizes an input
217 event to command all *OTHER* rfkill devices to also change their
218 statues when a specific rfkill device changes state.
219
220
221===============================================================================
2223: Kernel driver guidelines
223
224Remember: point-of-view is everything for a driver that connects to the rfkill
225subsystem. All the details below must be measured/perceived from the point of
226view of the specific driver being modified.
227
228The first thing one needs to know is whether his driver should be talking to
229the rfkill class or to the input layer. In rare cases (platform drivers), it
230could happen that you need to do both, as platform drivers often handle a
231variety of devices in the same driver.
232
233Do not mistake input devices for rfkill controllers. The only type of "rfkill
234switch" device that is to be registered with the rfkill class are those
235directly controlling the circuits that cause a wireless transmitter to stop
236working (or the software equivalent of them), i.e. what we call a rfkill
237controller. Every other kind of "rfkill switch" is just an input device and
238MUST NOT be registered with the rfkill class.
239
240A driver should register a device with the rfkill class when ALL of the
241following conditions are met (they define a rfkill controller):
242
2431. The device is/controls a data communications wireless transmitter;
244
2452. The kernel can interact with the hardware/firmware to CHANGE the wireless
246 transmitter state (block/unblock TX operation);
247
2483. The transmitter can be made to not emit any energy when "blocked":
249 rfkill is not about blocking data transmissions, it is about blocking
250 energy emission;
251
252A driver should register a device with the input subsystem to issue
253rfkill-related events (KEY_WLAN, KEY_BLUETOOTH, KEY_WWAN, KEY_WIMAX,
254SW_RFKILL_ALL, etc) when ALL of the folowing conditions are met:
255
2561. It is directly related to some physical device the user interacts with, to
257 command the O.S./firmware/hardware to enable/disable a data communications
258 wireless transmitter.
259
260 Examples of the physical device are: buttons, keys and switches the user
261 will press/touch/slide/switch to enable or disable the wireless
262 communication device.
263
2642. It is NOT slaved to another device, i.e. there is no other device that
265 issues rfkill-related input events in preference to this one.
266
267 Please refer to the corner cases and examples section for more details.
268
269When in doubt, do not issue input events. For drivers that should generate
270input events in some platforms, but not in others (e.g. b43), the best solution
271is to NEVER generate input events in the first place. That work should be
272deferred to a platform-specific kernel module (which will know when to generate
273events through the rfkill notifier chain) or to userspace. This avoids the
274usual maintenance problems with DMI whitelisting.
275
276
277Corner cases and examples:
278====================================
279
2801. If the device is an input device that, because of hardware or firmware,
281causes wireless transmitters to be blocked regardless of the kernel's will, it
282is still just an input device, and NOT to be registered with the rfkill class.
283
2842. If the wireless transmitter switch control is read-only, it is an input
285device and not to be registered with the rfkill class (and maybe not to be made
286an input layer event source either, see below).
287
2883. If there is some other device driver *closer* to the actual hardware the
289user interacted with (the button/switch/key) to issue an input event, THAT is
290the device driver that should be issuing input events.
291
292E.g:
293 [RFKILL slider switch] -- [GPIO hardware] -- [WLAN card rf-kill input]
294 (platform driver) (wireless card driver)
295
296The user is closer to the RFKILL slide switch plaform driver, so the driver
297which must issue input events is the platform driver looking at the GPIO
298hardware, and NEVER the wireless card driver (which is just a slave). It is
299very likely that there are other leaves than just the WLAN card rf-kill input
300(e.g. a bluetooth card, etc)...
301
302On the other hand, some embedded devices do this:
303
304 [RFKILL slider switch] -- [WLAN card rf-kill input]
305 (wireless card driver)
306
307In this situation, the wireless card driver *could* register itself as an input
308device and issue rf-kill related input events... but in order to AVOID the need
309for DMI whitelisting, the wireless card driver does NOT do it. Userspace (HAL)
310or a platform driver (that exists only on these embedded devices) will do the
311dirty job of issuing the input events.
312
313
314COMMON MISTAKES in kernel drivers, related to rfkill:
315====================================
316
3171. NEVER confuse input device keys and buttons with input device switches.
318
319 1a. Switches are always set or reset. They report the current state
320 (on position or off position).
321
322 1b. Keys and buttons are either in the pressed or not-pressed state, and
323 that's it. A "button" that latches down when you press it, and
324 unlatches when you press it again is in fact a switch as far as input
325 devices go.
326
327Add the SW_* events you need for switches, do NOT try to emulate a button using
328KEY_* events just because there is no such SW_* event yet. Do NOT try to use,
329for example, KEY_BLUETOOTH when you should be using SW_BLUETOOTH instead.
330
3312. Input device switches (sources of EV_SW events) DO store their current state
332(so you *must* initialize it by issuing a gratuitous input layer event on
333driver start-up and also when resuming from sleep), and that state CAN be
334queried from userspace through IOCTLs. There is no sysfs interface for this,
335but that doesn't mean you should break things trying to hook it to the rfkill
336class to get a sysfs interface :-)
337
3383. Do not issue *_RFKILL_ALL events by default, unless you are sure it is the
339correct event for your switch/button. These events are emergency power-off
340events when they are trying to turn the transmitters off. An example of an
341input device which SHOULD generate *_RFKILL_ALL events is the wireless-kill
342switch in a laptop which is NOT a hotkey, but a real sliding/rocker switch.
343An example of an input device which SHOULD NOT generate *_RFKILL_ALL events by
344default, is any sort of hot key that is type-specific (e.g. the one for WLAN).
345
346
3473.1 Guidelines for wireless device drivers
348------------------------------------------
349
350(in this text, rfkill->foo means the foo field of struct rfkill).
351
3521. Each independent transmitter in a wireless device (usually there is only one
353transmitter per device) should have a SINGLE rfkill class attached to it.
354
3552. If the device does not have any sort of hardware assistance to allow the
356driver to rfkill the device, the driver should emulate it by taking all actions
357required to silence the transmitter.
358
3593. If it is impossible to silence the transmitter (i.e. it still emits energy,
360even if it is just in brief pulses, when there is no data to transmit and there
361is no hardware support to turn it off) do NOT lie to the users. Do not attach
362it to a rfkill class. The rfkill subsystem does not deal with data
363transmission, it deals with energy emission. If the transmitter is emitting
364energy, it is not blocked in rfkill terms.
365
3664. It doesn't matter if the device has multiple rfkill input lines affecting
367the same transmitter, their combined state is to be exported as a single state
368per transmitter (see rule 1).
369
370This rule exists because users of the rfkill subsystem expect to get (and set,
371when possible) the overall transmitter rfkill state, not of a particular rfkill
372line.
373
3745. The wireless device driver MUST NOT leave the transmitter enabled during
375suspend and hibernation unless:
376 35
377 5.1. The transmitter has to be enabled for some sort of functionality 36The rfkill class code also notifies userspace of state changes, this is
378 like wake-on-wireless-packet or autonomous packed forwarding in a mesh 37achieved via uevents. It also provides some sysfs files for userspace to
379 network, and that functionality is enabled for this suspend/hibernation 38check the status of radio transmitters. See the "Userspace support" section
380 cycle. 39below.
381 40
382AND
383 41
384 5.2. The device was not on a user-requested BLOCKED state before 42The rfkill-input code implements a basic response to rfkill buttons -- it
385 the suspend (i.e. the driver must NOT unblock a device, not even 43implements turning on/off all devices of a certain class (or all).
386 to support wake-on-wireless-packet or remain in the mesh).
387 44
388In other words, there is absolutely no allowed scenario where a driver can 45When the device is hard-blocked (either by a call to rfkill_set_hw_state()
389automatically take action to unblock a rfkill controller (obviously, this deals 46or from query_hw_block) set_block() will be invoked but drivers can well
390with scenarios where soft-blocking or both soft and hard blocking is happening. 47ignore the method call since they can use the return value of the function
391Scenarios where hardware rfkill lines are the only ones blocking the 48rfkill_set_hw_state() to sync the software state instead of keeping track
392transmitter are outside of this rule, since the wireless device driver does not 49of calls to set_block().
393control its input hardware rfkill lines in the first place).
394 50
3956. During resume, rfkill will try to restore its previous state.
396 51
3977. After a rfkill class is suspended, it will *not* call rfkill->toggle_radio 52The entire functionality is spread over more than one subsystem:
398until it is resumed.
399 53
54 * The kernel input layer generates KEY_WWAN, KEY_WLAN etc. and
55 SW_RFKILL_ALL -- when the user presses a button. Drivers for radio
56 transmitters generally do not register to the input layer, unless the
57 device really provides an input device (i.e. a button that has no
58 effect other than generating a button press event)
400 59
401Example of a WLAN wireless driver connected to the rfkill subsystem: 60 * The rfkill-input code hooks up to these events and switches the soft-block
402-------------------------------------------------------------------- 61 of the various radio transmitters, depending on the button type.
403 62
404A certain WLAN card has one input pin that causes it to block the transmitter 63 * The rfkill drivers turn off/on their transmitters as requested.
405and makes the status of that input pin available (only for reading!) to the
406kernel driver. This is a hard rfkill input line (it cannot be overridden by
407the kernel driver).
408 64
409The card also has one PCI register that, if manipulated by the driver, causes 65 * The rfkill class will generate userspace notifications (uevents) to tell
410it to block the transmitter. This is a soft rfkill input line. 66 userspace what the current state is.
411 67
412It has also a thermal protection circuitry that shuts down its transmitter if
413the card overheats, and makes the status of that protection available (only for
414reading!) to the kernel driver. This is also a hard rfkill input line.
415 68
416If either one of these rfkill lines are active, the transmitter is blocked by
417the hardware and forced offline.
418 69
419The driver should allocate and attach to its struct device *ONE* instance of 703. Kernel driver guidelines
420the rfkill class (there is only one transmitter).
421 71
422It can implement the get_state() hook, and return RFKILL_STATE_HARD_BLOCKED if
423either one of its two hard rfkill input lines are active. If the two hard
424rfkill lines are inactive, it must return RFKILL_STATE_SOFT_BLOCKED if its soft
425rfkill input line is active. Only if none of the rfkill input lines are
426active, will it return RFKILL_STATE_UNBLOCKED.
427 72
428Since the device has a hardware rfkill line, it IS subject to state changes 73Drivers for radio transmitters normally implement only the rfkill class.
429external to rfkill. Therefore, the driver must make sure that it calls 74These drivers may not unblock the transmitter based on own decisions, they
430rfkill_force_state() to keep the status always up-to-date, and it must do a 75should act on information provided by the rfkill class only.
431rfkill_force_state() on resume from sleep.
432 76
433Every time the driver gets a notification from the card that one of its rfkill 77Platform drivers might implement input devices if the rfkill button is just
434lines changed state (polling might be needed on badly designed cards that don't 78that, a button. If that button influences the hardware then you need to
435generate interrupts for such events), it recomputes the rfkill state as per 79implement an rfkill class instead. This also applies if the platform provides
436above, and calls rfkill_force_state() to update it. 80a way to turn on/off the transmitter(s).
437 81
438The driver should implement the toggle_radio() hook, that: 82During suspend/hibernation, transmitters should only be left enabled when
83wake-on wlan or similar functionality requires it and the device wasn't
84blocked before suspend/hibernate. Note that it may be necessary to update
85the rfkill subsystem's idea of what the current state is at resume time if
86the state may have changed over suspend.
439 87
4401. Returns an error if one of the hardware rfkill lines are active, and the
441caller asked for RFKILL_STATE_UNBLOCKED.
442 88
4432. Activates the soft rfkill line if the caller asked for state
444RFKILL_STATE_SOFT_BLOCKED. It should do this even if one of the hard rfkill
445lines are active, effectively double-blocking the transmitter.
446 89
4473. Deactivates the soft rfkill line if none of the hardware rfkill lines are 904. Kernel API
448active and the caller asked for RFKILL_STATE_UNBLOCKED.
449
450===============================================================================
4514: Kernel API
452 91
453To build a driver with rfkill subsystem support, the driver should depend on 92To build a driver with rfkill subsystem support, the driver should depend on
454(or select) the Kconfig symbol RFKILL; it should _not_ depend on RKFILL_INPUT. 93(or select) the Kconfig symbol RFKILL.
455 94
456The hardware the driver talks to may be write-only (where the current state 95The hardware the driver talks to may be write-only (where the current state
457of the hardware is unknown), or read-write (where the hardware can be queried 96of the hardware is unknown), or read-write (where the hardware can be queried
458about its current state). 97about its current state).
459 98
460The rfkill class will call the get_state hook of a device every time it needs 99Calling rfkill_set_hw_state() when a state change happens is required from
461to know the *real* current state of the hardware. This can happen often, but 100rfkill drivers that control devices that can be hard-blocked unless they also
462it does not do any polling, so it is not enough on hardware that is subject 101assign the poll_hw_block() callback (then the rfkill core will poll the
463to state changes outside of the rfkill subsystem. 102device). Don't do this unless you cannot get the event in any other way.
464
465Therefore, calling rfkill_force_state() when a state change happens is
466mandatory when the device has a hardware rfkill line, or when something else
467like the firmware could cause its state to be changed without going through the
468rfkill class.
469
470Some hardware provides events when its status changes. In these cases, it is
471best for the driver to not provide a get_state hook, and instead register the
472rfkill class *already* with the correct status, and keep it updated using
473rfkill_force_state() when it gets an event from the hardware.
474
475rfkill_force_state() must be used on the device resume handlers to update the
476rfkill status, should there be any chance of the device status changing during
477the sleep.
478
479There is no provision for a statically-allocated rfkill struct. You must
480use rfkill_allocate() to allocate one.
481
482You should:
483 - rfkill_allocate()
484 - modify rfkill fields (flags, name)
485 - modify state to the current hardware state (THIS IS THE ONLY TIME
486 YOU CAN ACCESS state DIRECTLY)
487 - rfkill_register()
488
489The only way to set a device to the RFKILL_STATE_HARD_BLOCKED state is through
490a suitable return of get_state() or through rfkill_force_state().
491 103
492When a device is in the RFKILL_STATE_HARD_BLOCKED state, the only way to switch
493it to a different state is through a suitable return of get_state() or through
494rfkill_force_state().
495 104
496If toggle_radio() is called to set a device to state RFKILL_STATE_SOFT_BLOCKED
497when that device is already at the RFKILL_STATE_HARD_BLOCKED state, it should
498not return an error. Instead, it should try to double-block the transmitter,
499so that its state will change from RFKILL_STATE_HARD_BLOCKED to
500RFKILL_STATE_SOFT_BLOCKED should the hardware blocking cease.
501
502Please refer to the source for more documentation.
503
504===============================================================================
5055: Userspace support
506
507rfkill devices issue uevents (with an action of "change"), with the following
508environment variables set:
509
510RFKILL_NAME
511RFKILL_STATE
512RFKILL_TYPE
513 105
514The ABI for these variables is defined by the sysfs attributes. It is best 1065. Userspace support
515to take a quick look at the source to make sure of the possible values.
516 107
517It is expected that HAL will trap those, and bridge them to DBUS, etc. These 108The following sysfs entries exist for every rfkill device:
518events CAN and SHOULD be used to give feedback to the user about the rfkill
519status of the system.
520
521Input devices may issue events that are related to rfkill. These are the
522various KEY_* events and SW_* events supported by rfkill-input.c.
523
524******IMPORTANT******
525When rfkill-input is ACTIVE, userspace is NOT TO CHANGE THE STATE OF AN RFKILL
526SWITCH IN RESPONSE TO AN INPUT EVENT also handled by rfkill-input, unless it
527has set to true the user_claim attribute for that particular switch. This rule
528is *absolute*; do NOT violate it.
529******IMPORTANT******
530
531Userspace must not assume it is the only source of control for rfkill switches.
532Their state CAN and WILL change due to firmware actions, direct user actions,
533and the rfkill-input EPO override for *_RFKILL_ALL.
534
535When rfkill-input is not active, userspace must initiate a rfkill status
536change by writing to the "state" attribute in order for anything to happen.
537
538Take particular care to implement EV_SW SW_RFKILL_ALL properly. When that
539switch is set to OFF, *every* rfkill device *MUST* be immediately put into the
540RFKILL_STATE_SOFT_BLOCKED state, no questions asked.
541
542The following sysfs entries will be created:
543 109
544 name: Name assigned by driver to this key (interface or driver name). 110 name: Name assigned by driver to this key (interface or driver name).
545 type: Name of the key type ("wlan", "bluetooth", etc). 111 type: Name of the key type ("wlan", "bluetooth", etc).
546 state: Current state of the transmitter 112 state: Current state of the transmitter
547 0: RFKILL_STATE_SOFT_BLOCKED 113 0: RFKILL_STATE_SOFT_BLOCKED
548 transmitter is forced off, but one can override it 114 transmitter is turned off by software
549 by a write to the state attribute;
550 1: RFKILL_STATE_UNBLOCKED 115 1: RFKILL_STATE_UNBLOCKED
551 transmiter is NOT forced off, and may operate if 116 transmitter is (potentially) active
552 all other conditions for such operation are met
553 (such as interface is up and configured, etc);
554 2: RFKILL_STATE_HARD_BLOCKED 117 2: RFKILL_STATE_HARD_BLOCKED
555 transmitter is forced off by something outside of 118 transmitter is forced off by something outside of
556 the driver's control. One cannot set a device to 119 the driver's control.
557 this state through writes to the state attribute; 120 claim: 0: Kernel handles events (currently always reads that value)
558 claim: 1: Userspace handles events, 0: Kernel handles events 121
559 122rfkill devices also issue uevents (with an action of "change"), with the
560Both the "state" and "claim" entries are also writable. For the "state" entry 123following environment variables set:
561this means that when 1 or 0 is written, the device rfkill state (if not yet in 124
562the requested state), will be will be toggled accordingly. 125RFKILL_NAME
563 126RFKILL_STATE
564For the "claim" entry writing 1 to it means that the kernel no longer handles 127RFKILL_TYPE
565key events even though RFKILL_INPUT input was enabled. When "claim" has been 128
566set to 0, userspace should make sure that it listens for the input events or 129The contents of these variables corresponds to the "name", "state" and
567check the sysfs "state" entry regularly to correctly perform the required tasks 130"type" sysfs files explained above.
568when the rkfill key is pressed. 131
569 132An alternative userspace interface exists as a misc device /dev/rfkill,
570A note about input devices and EV_SW events: 133which allows userspace to obtain and set the state of rfkill devices and
571 134sets of devices. It also notifies userspace about device addition and
572In order to know the current state of an input device switch (like 135removal. The API is a simple read/write API that is defined in
573SW_RFKILL_ALL), you will need to use an IOCTL. That information is not 136linux/rfkill.h.
574available through sysfs in a generic way at this time, and it is not available
575through the rfkill class AT ALL.
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 10711d9f0788..1eb576a023bd 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -1984,7 +1984,7 @@ break *$pc
1984 1984
1985break *0x400618 1985break *0x400618
1986 1986
1987heres a really useful one for large programs 1987Here's a really useful one for large programs
1988rbr 1988rbr
1989Set a breakpoint for all functions matching REGEXP 1989Set a breakpoint for all functions matching REGEXP
1990e.g. 1990e.g.
@@ -2211,7 +2211,7 @@ Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
2211#5 0x51692c in readline_internal () at readline.c:521 2211#5 0x51692c in readline_internal () at readline.c:521
2212#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ") 2212#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ")
2213 at readline.c:349 2213 at readline.c:349
2214#7 0x4d7a8a in command_line_input (prrompt=0x564420 "(gdb) ", repeat=1, 2214#7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
2215 annotation_suffix=0x4d6b44 "prompt") at top.c:2091 2215 annotation_suffix=0x4d6b44 "prompt") at top.c:2091
2216#8 0x4d6cf0 in command_loop () at top.c:1345 2216#8 0x4d6cf0 in command_loop () at top.c:1345
2217#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635 2217#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
diff --git a/Documentation/scheduler/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt
index e2bae5a577e3..3ac1e46d5365 100644
--- a/Documentation/scheduler/sched-nice-design.txt
+++ b/Documentation/scheduler/sched-nice-design.txt
@@ -55,7 +55,7 @@ To sum it up: we always wanted to make nice levels more consistent, but
55within the constraints of HZ and jiffies and their nasty design level 55within the constraints of HZ and jiffies and their nasty design level
56coupling to timeslices and granularity it was not really viable. 56coupling to timeslices and granularity it was not really viable.
57 57
58The second (less frequent but still periodically occuring) complaint 58The second (less frequent but still periodically occurring) complaint
59about Linux's nice level support was its assymetry around the origo 59about Linux's nice level support was its assymetry around the origo
60(which you can see demonstrated in the picture above), or more 60(which you can see demonstrated in the picture above), or more
61accurately: the fact that nice level behavior depended on the _absolute_ 61accurately: the fact that nice level behavior depended on the _absolute_
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index 5ba4d3fc625a..1df7f9cdab05 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -4,6 +4,7 @@
4CONTENTS 4CONTENTS
5======== 5========
6 6
70. WARNING
71. Overview 81. Overview
8 1.1 The problem 9 1.1 The problem
9 1.2 The solution 10 1.2 The solution
@@ -14,6 +15,23 @@ CONTENTS
143. Future plans 153. Future plans
15 16
16 17
180. WARNING
19==========
20
21 Fiddling with these settings can result in an unstable system, the knobs are
22 root only and assumes root knows what he is doing.
23
24Most notable:
25
26 * very small values in sched_rt_period_us can result in an unstable
27 system when the period is smaller than either the available hrtimer
28 resolution, or the time it takes to handle the budget refresh itself.
29
30 * very small values in sched_rt_runtime_us can result in an unstable
31 system when the runtime is so small the system has difficulty making
32 forward progress (NOTE: the migration thread and kstopmachine both
33 are real-time processes).
34
171. Overview 351. Overview
18=========== 36===========
19 37
@@ -169,7 +187,7 @@ get their allocated time.
169 187
170Implementing SCHED_EDF might take a while to complete. Priority Inheritance is 188Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
171the biggest challenge as the current linux PI infrastructure is geared towards 189the biggest challenge as the current linux PI infrastructure is geared towards
172the limited static priority levels 0-139. With deadline scheduling you need to 190the limited static priority levels 0-99. With deadline scheduling you need to
173do deadline inheritance (since priority is inversely proportional to the 191do deadline inheritance (since priority is inversely proportional to the
174deadline delta (deadline - now). 192deadline delta (deadline - now).
175 193
diff --git a/Documentation/scsi/aic79xx.txt b/Documentation/scsi/aic79xx.txt
index 683ccae00ad4..c014eccaf19f 100644
--- a/Documentation/scsi/aic79xx.txt
+++ b/Documentation/scsi/aic79xx.txt
@@ -194,7 +194,7 @@ The following information is available in this file:
194 - Packetized SCSI Protocol at 160MB/s and 320MB/s 194 - Packetized SCSI Protocol at 160MB/s and 320MB/s
195 - Quick Arbitration Selection (QAS) 195 - Quick Arbitration Selection (QAS)
196 - Retained Training Information (Rev B. ASIC only) 196 - Retained Training Information (Rev B. ASIC only)
197 - Interrupt Coalessing 197 - Interrupt Coalescing
198 - Initiator Mode (target mode not currently 198 - Initiator Mode (target mode not currently
199 supported) 199 supported)
200 - Support for the PCI-X standard up to 133MHz 200 - Support for the PCI-X standard up to 133MHz
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
index 230e30846ef2..08e2b4d04aab 100644
--- a/Documentation/scsi/ncr53c8xx.txt
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -206,7 +206,7 @@ of MOVE MEMORY instructions.
206The 896 and the 895A allows handling of the phase mismatch context from 206The 896 and the 895A allows handling of the phase mismatch context from
207SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor 207SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor
208until the C code has saved the context of the transfer). 208until the C code has saved the context of the transfer).
209Implementing this without using LOAD/STORE instructions would be painfull 209Implementing this without using LOAD/STORE instructions would be painful
210and I didn't even want to try it. 210and I didn't even want to try it.
211 211
212The 896 chip supports 64 bit PCI transactions and addressing, while the 212The 896 chip supports 64 bit PCI transactions and addressing, while the
@@ -240,7 +240,7 @@ characteristics. This feature may also reduce average command latency.
240In order to really gain advantage of this feature, devices must have 240In order to really gain advantage of this feature, devices must have
241a reasonable cache size (No miracle is to be expected for a low-end 241a reasonable cache size (No miracle is to be expected for a low-end
242hard disk with 128 KB or less). 242hard disk with 128 KB or less).
243Some kown SCSI devices do not properly support tagged command queuing. 243Some known SCSI devices do not properly support tagged command queuing.
244Generally, firmware revisions that fix this kind of problems are available 244Generally, firmware revisions that fix this kind of problems are available
245at respective vendor web/ftp sites. 245at respective vendor web/ftp sites.
246All I can say is that the hard disks I use on my machines behave well with 246All I can say is that the hard disks I use on my machines behave well with
diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt
index 49ea5c58c6bc..eb9a7b905b64 100644
--- a/Documentation/scsi/sym53c8xx_2.txt
+++ b/Documentation/scsi/sym53c8xx_2.txt
@@ -206,7 +206,7 @@ characteristics. This feature may also reduce average command latency.
206In order to really gain advantage of this feature, devices must have 206In order to really gain advantage of this feature, devices must have
207a reasonable cache size (No miracle is to be expected for a low-end 207a reasonable cache size (No miracle is to be expected for a low-end
208hard disk with 128 KB or less). 208hard disk with 128 KB or less).
209Some kown old SCSI devices do not properly support tagged command queuing. 209Some known old SCSI devices do not properly support tagged command queuing.
210Generally, firmware revisions that fix this kind of problems are available 210Generally, firmware revisions that fix this kind of problems are available
211at respective vendor web/ftp sites. 211at respective vendor web/ftp sites.
212All I can say is that I never have had problem with tagged queuing using 212All I can say is that I never have had problem with tagged queuing using
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 012858d2b119..4252697a95d6 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -460,6 +460,25 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
460 460
461 The power-management is supported. 461 The power-management is supported.
462 462
463 Module snd-ctxfi
464 ----------------
465
466 Module for Creative Sound Blaster X-Fi boards (20k1 / 20k2 chips)
467 * Creative Sound Blaster X-Fi Titanium Fatal1ty Champion Series
468 * Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series
469 * Creative Sound Blaster X-Fi Titanium Professional Audio
470 * Creative Sound Blaster X-Fi Titanium
471 * Creative Sound Blaster X-Fi Elite Pro
472 * Creative Sound Blaster X-Fi Platinum
473 * Creative Sound Blaster X-Fi Fatal1ty
474 * Creative Sound Blaster X-Fi XtremeGamer
475 * Creative Sound Blaster X-Fi XtremeMusic
476
477 reference_rate - reference sample rate, 44100 or 48000 (default)
478 multiple - multiple to ref. sample rate, 1 or 2 (default)
479
480 This module supports multiple cards.
481
463 Module snd-darla20 482 Module snd-darla20
464 ------------------ 483 ------------------
465 484
@@ -754,7 +773,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
754 single_cmd - Use single immediate commands to communicate with 773 single_cmd - Use single immediate commands to communicate with
755 codecs (for debugging only) 774 codecs (for debugging only)
756 enable_msi - Enable Message Signaled Interrupt (MSI) (default = off) 775 enable_msi - Enable Message Signaled Interrupt (MSI) (default = off)
757 power_save - Automatic power-saving timtout (in second, 0 = 776 power_save - Automatic power-saving timeout (in second, 0 =
758 disable) 777 disable)
759 power_save_controller - Reset HD-audio controller in power-saving mode 778 power_save_controller - Reset HD-audio controller in power-saving mode
760 (default = on) 779 (default = on)
@@ -925,6 +944,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
925 * Onkyo SE-90PCI 944 * Onkyo SE-90PCI
926 * Onkyo SE-200PCI 945 * Onkyo SE-200PCI
927 * ESI Juli@ 946 * ESI Juli@
947 * ESI Maya44
928 * Hercules Fortissimo IV 948 * Hercules Fortissimo IV
929 * EGO-SYS WaveTerminal 192M 949 * EGO-SYS WaveTerminal 192M
930 950
@@ -933,7 +953,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
933 prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192, 953 prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192,
934 juli, aureon51, aureon71, universe, ap192, k8x800, 954 juli, aureon51, aureon71, universe, ap192, k8x800,
935 phase22, phase28, ms300, av710, se200pci, se90pci, 955 phase22, phase28, ms300, av710, se200pci, se90pci,
936 fortissimo4, sn25p, WT192M 956 fortissimo4, sn25p, WT192M, maya44
937 957
938 This module supports multiple cards and autoprobe. 958 This module supports multiple cards and autoprobe.
939 959
@@ -1093,6 +1113,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1093 This module supports multiple cards. 1113 This module supports multiple cards.
1094 The driver requires the firmware loader support on kernel. 1114 The driver requires the firmware loader support on kernel.
1095 1115
1116 Module snd-lx6464es
1117 -------------------
1118
1119 Module for Digigram LX6464ES boards
1120
1121 This module supports multiple cards.
1122
1096 Module snd-maestro3 1123 Module snd-maestro3
1097 ------------------- 1124 -------------------
1098 1125
@@ -1543,13 +1570,15 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1543 Module snd-sc6000 1570 Module snd-sc6000
1544 ----------------- 1571 -----------------
1545 1572
1546 Module for Gallant SC-6000 soundcard. 1573 Module for Gallant SC-6000 soundcard and later models: SC-6600
1574 and SC-7000.
1547 1575
1548 port - Port # (0x220 or 0x240) 1576 port - Port # (0x220 or 0x240)
1549 mss_port - MSS Port # (0x530 or 0xe80) 1577 mss_port - MSS Port # (0x530 or 0xe80)
1550 irq - IRQ # (5,7,9,10,11) 1578 irq - IRQ # (5,7,9,10,11)
1551 mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq 1579 mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq
1552 dma - DMA # (1,3,0) 1580 dma - DMA # (1,3,0)
1581 joystick - Enable gameport - 0 = disable (default), 1 = enable
1553 1582
1554 This module supports multiple cards. 1583 This module supports multiple cards.
1555 1584
@@ -1859,7 +1888,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1859 ------------------- 1888 -------------------
1860 1889
1861 Module for sound cards based on the Asus AV100/AV200 chips, 1890 Module for sound cards based on the Asus AV100/AV200 chips,
1862 i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), and Essence STX. 1891 i.e., Xonar D1, DX, D2, D2X, HDAV1.3 (Deluxe), Essence ST
1892 (Deluxe) and Essence STX.
1863 1893
1864 This module supports autoprobe and multiple cards. 1894 This module supports autoprobe and multiple cards.
1865 1895
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 322869fc8a9e..de8e10a94103 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -36,6 +36,7 @@ ALC260
36 acer Acer TravelMate 36 acer Acer TravelMate
37 will Will laptops (PB V7900) 37 will Will laptops (PB V7900)
38 replacer Replacer 672V 38 replacer Replacer 672V
39 favorit100 Maxdata Favorit 100XS
39 basic fixed pin assignment (old default model) 40 basic fixed pin assignment (old default model)
40 test for testing/debugging purpose, almost all controls can 41 test for testing/debugging purpose, almost all controls can
41 adjusted. Appearing only when compiled with 42 adjusted. Appearing only when compiled with
@@ -85,10 +86,11 @@ ALC269
85 eeepc-p703 ASUS Eeepc P703 P900A 86 eeepc-p703 ASUS Eeepc P703 P900A
86 eeepc-p901 ASUS Eeepc P901 S101 87 eeepc-p901 ASUS Eeepc P901 S101
87 fujitsu FSC Amilo 88 fujitsu FSC Amilo
89 lifebook Fujitsu Lifebook S6420
88 auto auto-config reading BIOS (default) 90 auto auto-config reading BIOS (default)
89 91
90ALC662/663 92ALC662/663/272
91========== 93==============
92 3stack-dig 3-stack (2-channel) with SPDIF 94 3stack-dig 3-stack (2-channel) with SPDIF
93 3stack-6ch 3-stack (6-channel) 95 3stack-6ch 3-stack (6-channel)
94 3stack-6ch-dig 3-stack (6-channel) with SPDIF 96 3stack-6ch-dig 3-stack (6-channel) with SPDIF
@@ -107,6 +109,9 @@ ALC662/663
107 asus-mode4 ASUS 109 asus-mode4 ASUS
108 asus-mode5 ASUS 110 asus-mode5 ASUS
109 asus-mode6 ASUS 111 asus-mode6 ASUS
112 dell Dell with ALC272
113 dell-zm1 Dell ZM1 with ALC272
114 samsung-nc10 Samsung NC10 mini notebook
110 auto auto-config reading BIOS (default) 115 auto auto-config reading BIOS (default)
111 116
112ALC882/885 117ALC882/885
@@ -118,6 +123,7 @@ ALC882/885
118 asus-a7j ASUS A7J 123 asus-a7j ASUS A7J
119 asus-a7m ASUS A7M 124 asus-a7m ASUS A7M
120 macpro MacPro support 125 macpro MacPro support
126 mb5 Macbook 5,1
121 mbp3 Macbook Pro rev3 127 mbp3 Macbook Pro rev3
122 imac24 iMac 24'' with jack detection 128 imac24 iMac 24'' with jack detection
123 w2jc ASUS W2JC 129 w2jc ASUS W2JC
@@ -133,10 +139,12 @@ ALC883/888
133 acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc) 139 acer Acer laptops (Travelmate 3012WTMi, Aspire 5600, etc)
134 acer-aspire Acer Aspire 9810 140 acer-aspire Acer Aspire 9810
135 acer-aspire-4930g Acer Aspire 4930G 141 acer-aspire-4930g Acer Aspire 4930G
142 acer-aspire-8930g Acer Aspire 8930G
136 medion Medion Laptops 143 medion Medion Laptops
137 medion-md2 Medion MD2 144 medion-md2 Medion MD2
138 targa-dig Targa/MSI 145 targa-dig Targa/MSI
139 targa-2ch-dig Targs/MSI with 2-channel 146 targa-2ch-dig Targa/MSI with 2-channel
147 targa-8ch-dig Targa/MSI with 8-channel (MSI GX620)
140 laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE) 148 laptop-eapd 3-jack with SPDIF I/O and EAPD (Clevo M540JE, M550JE)
141 lenovo-101e Lenovo 101E 149 lenovo-101e Lenovo 101E
142 lenovo-nb0763 Lenovo NB0763 150 lenovo-nb0763 Lenovo NB0763
@@ -150,6 +158,9 @@ ALC883/888
150 fujitsu-pi2515 Fujitsu AMILO Pi2515 158 fujitsu-pi2515 Fujitsu AMILO Pi2515
151 fujitsu-xa3530 Fujitsu AMILO XA3530 159 fujitsu-xa3530 Fujitsu AMILO XA3530
152 3stack-6ch-intel Intel DG33* boards 160 3stack-6ch-intel Intel DG33* boards
161 asus-p5q ASUS P5Q-EM boards
162 mb31 MacBook 3,1
163 sony-vaio-tt Sony VAIO TT
153 auto auto-config reading BIOS (default) 164 auto auto-config reading BIOS (default)
154 165
155ALC861/660 166ALC861/660
@@ -348,6 +359,7 @@ STAC92HD71B*
348 hp-m4 HP mini 1000 359 hp-m4 HP mini 1000
349 hp-dv5 HP dv series 360 hp-dv5 HP dv series
350 hp-hdx HP HDX series 361 hp-hdx HP HDX series
362 hp-dv4-1222nr HP dv4-1222nr (with LED support)
351 auto BIOS setup (default) 363 auto BIOS setup (default)
352 364
353STAC92HD73* 365STAC92HD73*
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index 88b7433d2f11..71ac995b1915 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -16,7 +16,7 @@ methods for the HD-audio hardware.
16The HD-audio component consists of two parts: the controller chip and 16The HD-audio component consists of two parts: the controller chip and
17the codec chips on the HD-audio bus. Linux provides a single driver 17the codec chips on the HD-audio bus. Linux provides a single driver
18for all controllers, snd-hda-intel. Although the driver name contains 18for all controllers, snd-hda-intel. Although the driver name contains
19a word of a well-known harware vendor, it's not specific to it but for 19a word of a well-known hardware vendor, it's not specific to it but for
20all controller chips by other companies. Since the HD-audio 20all controller chips by other companies. Since the HD-audio
21controllers are supposed to be compatible, the single snd-hda-driver 21controllers are supposed to be compatible, the single snd-hda-driver
22should work in most cases. But, not surprisingly, there are known 22should work in most cases. But, not surprisingly, there are known
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
index cfac20cf9e33..381908d8ca42 100644
--- a/Documentation/sound/alsa/Procfile.txt
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -88,26 +88,34 @@ card*/pcm*/info
88 substreams, etc. 88 substreams, etc.
89 89
90card*/pcm*/xrun_debug 90card*/pcm*/xrun_debug
91 This file appears when CONFIG_SND_DEBUG=y. 91 This file appears when CONFIG_SND_DEBUG=y and
92 This shows the status of xrun (= buffer overrun/xrun) debug of 92 CONFIG_PCM_XRUN_DEBUG=y.
93 ALSA PCM middle layer, as an integer from 0 to 2. The value 93 This shows the status of xrun (= buffer overrun/xrun) and
94 can be changed by writing to this file, such as 94 invalid PCM position debug/check of ALSA PCM middle layer.
95 95 It takes an integer value, can be changed by writing to this
96 # cat 2 > /proc/asound/card0/pcm0p/xrun_debug 96 file, such as
97 97
98 When this value is greater than 0, the driver will show the 98 # cat 5 > /proc/asound/card0/pcm0p/xrun_debug
99 messages to kernel log when an xrun is detected. The debug 99
100 message is shown also when the invalid H/W pointer is detected 100 The value consists of the following bit flags:
101 at the update of periods (usually called from the interrupt 101 bit 0 = Enable XRUN/jiffies debug messages
102 bit 1 = Show stack trace at XRUN / jiffies check
103 bit 2 = Enable additional jiffies check
104
105 When the bit 0 is set, the driver will show the messages to
106 kernel log when an xrun is detected. The debug message is
107 shown also when the invalid H/W pointer is detected at the
108 update of periods (usually called from the interrupt
102 handler). 109 handler).
103 110
104 When this value is greater than 1, the driver will show the 111 When the bit 1 is set, the driver will show the stack trace
105 stack trace additionally. This may help the debugging. 112 additionally. This may help the debugging.
106 113
107 Since 2.6.30, this option also enables the hwptr check using 114 Since 2.6.30, this option can enable the hwptr check using
108 jiffies. This detects spontaneous invalid pointer callback 115 jiffies. This detects spontaneous invalid pointer callback
109 values, but can be lead to too much corrections for a (mostly 116 values, but can be lead to too much corrections for a (mostly
110 buggy) hardware that doesn't give smooth pointer updates. 117 buggy) hardware that doesn't give smooth pointer updates.
118 This feature is enabled via the bit 2.
111 119
112card*/pcm*/sub*/info 120card*/pcm*/sub*/info
113 The general information of this PCM sub-stream. 121 The general information of this PCM sub-stream.
diff --git a/Documentation/sound/alsa/README.maya44 b/Documentation/sound/alsa/README.maya44
new file mode 100644
index 000000000000..0e41576fa13e
--- /dev/null
+++ b/Documentation/sound/alsa/README.maya44
@@ -0,0 +1,163 @@
1NOTE: The following is the original document of Rainer's patch that the
2current maya44 code based on. Some contents might be obsoleted, but I
3keep here as reference -- tiwai
4
5----------------------------------------------------------------
6
7STATE OF DEVELOPMENT:
8
9This driver is being developed on the initiative of Piotr Makowski (oponek@gmail.com) and financed by Lars Bergmann.
10Development is carried out by Rainer Zimmermann (mail@lightshed.de).
11
12ESI provided a sample Maya44 card for the development work.
13
14However, unfortunately it has turned out difficult to get detailed programming information, so I (Rainer Zimmermann) had to find out some card-specific information by experiment and conjecture. Some information (in particular, several GPIO bits) is still missing.
15
16This is the first testing version of the Maya44 driver released to the alsa-devel mailing list (Feb 5, 2008).
17
18
19The following functions work, as tested by Rainer Zimmermann and Piotr Makowski:
20
21- playback and capture at all sampling rates
22- input/output level
23- crossmixing
24- line/mic switch
25- phantom power switch
26- analogue monitor a.k.a bypass
27
28
29The following functions *should* work, but are not fully tested:
30
31- Channel 3+4 analogue - S/PDIF input switching
32- S/PDIF output
33- all inputs/outputs on the M/IO/DIO extension card
34- internal/external clock selection
35
36
37*In particular, we would appreciate testing of these functions by anyone who has access to an M/IO/DIO extension card.*
38
39
40Things that do not seem to work:
41
42- The level meters ("multi track") in 'alsamixer' do not seem to react to signals in (if this is a bug, it would probably be in the existing ICE1724 code).
43
44- Ardour 2.1 seems to work only via JACK, not using ALSA directly or via OSS. This still needs to be tracked down.
45
46
47DRIVER DETAILS:
48
49the following files were added:
50
51pci/ice1724/maya44.c - Maya44 specific code
52pci/ice1724/maya44.h
53pci/ice1724/ice1724.patch
54pci/ice1724/ice1724.h.patch - PROPOSED patch to ice1724.h (see SAMPLING RATES)
55i2c/other/wm8776.c - low-level access routines for Wolfson WM8776 codecs
56include/wm8776.h
57
58
59Note that the wm8776.c code is meant to be card-independent and does not actually register the codec with the ALSA infrastructure.
60This is done in maya44.c, mainly because some of the WM8776 controls are used in Maya44-specific ways, and should be named appropriately.
61
62
63the following files were created in pci/ice1724, simply #including the corresponding file from the alsa-kernel tree:
64
65wtm.h
66vt1720_mobo.h
67revo.h
68prodigy192.h
69pontis.h
70phase.h
71maya44.h
72juli.h
73aureon.h
74amp.h
75envy24ht.h
76se.h
77prodigy_hifi.h
78
79
80*I hope this is the correct way to do things.*
81
82
83SAMPLING RATES:
84
85The Maya44 card (or more exactly, the Wolfson WM8776 codecs) allow a maximum sampling rate of 192 kHz for playback and 92 kHz for capture.
86
87As the ICE1724 chip only allows one global sampling rate, this is handled as follows:
88
89* setting the sampling rate on any open PCM device on the maya44 card will always set the *global* sampling rate for all playback and capture channels.
90
91* In the current state of the driver, setting rates of up to 192 kHz is permitted even for capture devices.
92
93*AVOID CAPTURING AT RATES ABOVE 96kHz*, even though it may appear to work. The codec cannot actually capture at such rates, meaning poor quality.
94
95
96I propose some additional code for limiting the sampling rate when setting on a capture pcm device. However because of the global sampling rate, this logic would be somewhat problematic.
97
98The proposed code (currently deactivated) is in ice1712.h.patch, ice1724.c and maya44.c (in pci/ice1712).
99
100
101SOUND DEVICES:
102
103PCM devices correspond to inputs/outputs as follows (assuming Maya44 is card #0):
104
105hw:0,0 input - stereo, analog input 1+2
106hw:0,0 output - stereo, analog output 1+2
107hw:0,1 input - stereo, analog input 3+4 OR S/PDIF input
108hw:0,1 output - stereo, analog output 3+4 (and SPDIF out)
109
110
111NAMING OF MIXER CONTROLS:
112
113(for more information about the signal flow, please refer to the block diagram on p.24 of the ESI Maya44 manual, or in the ESI windows software).
114
115
116PCM: (digital) output level for channel 1+2
117PCM 1: same for channel 3+4
118
119Mic Phantom+48V: switch for +48V phantom power for electrostatic microphones on input 1/2.
120 Make sure this is not turned on while any other source is connected to input 1/2.
121 It might damage the source and/or the maya44 card.
122
123Mic/Line input: if switch is is on, input jack 1/2 is microphone input (mono), otherwise line input (stereo).
124
125Bypass: analogue bypass from ADC input to output for channel 1+2. Same as "Monitor" in the windows driver.
126Bypass 1: same for channel 3+4.
127
128Crossmix: cross-mixer from channels 1+2 to channels 3+4
129Crossmix 1: cross-mixer from channels 3+4 to channels 1+2
130
131IEC958 Output: switch for S/PDIF output.
132 This is not supported by the ESI windows driver.
133 S/PDIF should output the same signal as channel 3+4. [untested!]
134
135
136Digitial output selectors:
137
138 These switches allow a direct digital routing from the ADCs to the DACs.
139 Each switch determines where the digital input data to one of the DACs comes from.
140 They are not supported by the ESI windows driver.
141 For normal operation, they should all be set to "PCM out".
142
143H/W: Output source channel 1
144H/W 1: Output source channel 2
145H/W 2: Output source channel 3
146H/W 3: Output source channel 4
147
148H/W 4 ... H/W 9: unknown function, left in to enable testing.
149 Possibly some of these control S/PDIF output(s).
150 If these turn out to be unused, they will go away in later driver versions.
151
152Selectable values for each of the digital output selectors are:
153 "PCM out" -> DAC output of the corresponding channel (default setting)
154 "Input 1"...
155 "Input 4" -> direct routing from ADC output of the selected input channel
156
157
158--------
159
160Feb 14, 2008
161Rainer Zimmermann
162mail@lightshed.de
163
diff --git a/Documentation/sound/alsa/hda_codec.txt b/Documentation/sound/alsa/hda_codec.txt
index 34e87ec1379c..de8efbc7e4bd 100644
--- a/Documentation/sound/alsa/hda_codec.txt
+++ b/Documentation/sound/alsa/hda_codec.txt
@@ -114,7 +114,7 @@ For writing a sequence of verbs, use snd_hda_sequence_write().
114 114
115There are variants of cached read/write, snd_hda_codec_write_cache(), 115There are variants of cached read/write, snd_hda_codec_write_cache(),
116snd_hda_sequence_write_cache(). These are used for recording the 116snd_hda_sequence_write_cache(). These are used for recording the
117register states for the power-mangement resume. When no PM is needed, 117register states for the power-management resume. When no PM is needed,
118these are equivalent with non-cached version. 118these are equivalent with non-cached version.
119 119
120To retrieve the number of sub nodes connected to the given node, use 120To retrieve the number of sub nodes connected to the given node, use
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 9e6763264a2e..9ac842be9b4f 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:-
62 o Mic - Mic (and optional Jack) 62 o Mic - Mic (and optional Jack)
63 o Line - Line Input/Output (and optional Jack) 63 o Line - Line Input/Output (and optional Jack)
64 o Speaker - Speaker 64 o Speaker - Speaker
65 o Supply - Power or clock supply widget used by other widgets.
65 o Pre - Special PRE widget (exec before all others) 66 o Pre - Special PRE widget (exec before all others)
66 o Post - Special POST widget (exec after all others) 67 o Post - Special POST widget (exec after all others)
67 68
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f11ca7979fa6..322a00bb99d9 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -32,6 +32,7 @@ show up in /proc/sys/kernel:
32- kstack_depth_to_print [ X86 only ] 32- kstack_depth_to_print [ X86 only ]
33- l2cr [ PPC only ] 33- l2cr [ PPC only ]
34- modprobe ==> Documentation/debugging-modules.txt 34- modprobe ==> Documentation/debugging-modules.txt
35- modules_disabled
35- msgmax 36- msgmax
36- msgmnb 37- msgmnb
37- msgmni 38- msgmni
@@ -184,6 +185,16 @@ kernel stack.
184 185
185============================================================== 186==============================================================
186 187
188modules_disabled:
189
190A toggle value indicating if modules are allowed to be loaded
191in an otherwise modular kernel. This toggle defaults to off
192(0), but can be set true (1). Once true, modules can be
193neither loaded nor unloaded, and the toggle cannot be set back
194to false.
195
196==============================================================
197
187osrelease, ostype & version: 198osrelease, ostype & version:
188 199
189# cat osrelease 200# cat osrelease
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index c302ddf629a0..6fab2dcbb4d3 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -358,7 +358,7 @@ nr_pdflush_threads
358The current number of pdflush threads. This value is read-only. 358The current number of pdflush threads. This value is read-only.
359The value changes according to the number of dirty pages in the system. 359The value changes according to the number of dirty pages in the system.
360 360
361When neccessary, additional pdflush threads are created, one per second, up to 361When necessary, additional pdflush threads are created, one per second, up to
362nr_pdflush_threads_max. 362nr_pdflush_threads_max.
363 363
364============================================================== 364==============================================================
@@ -565,7 +565,7 @@ swappiness
565 565
566This control is used to define how aggressive the kernel will swap 566This control is used to define how aggressive the kernel will swap
567memory pages. Higher values will increase agressiveness, lower values 567memory pages. Higher values will increase agressiveness, lower values
568descrease the amount of swap. 568decrease the amount of swap.
569 569
570The default value is 60. 570The default value is 60.
571 571
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt
index e7c09abcfab4..04763a325520 100644
--- a/Documentation/timers/hpet.txt
+++ b/Documentation/timers/hpet.txt
@@ -7,7 +7,7 @@ by Intel and Microsoft which can be found at
7 7
8Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision") 8Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
9and up to 32 comparators. Normally three or more comparators are provided, 9and up to 32 comparators. Normally three or more comparators are provided,
10each of which can generate oneshot interupts and at least one of which has 10each of which can generate oneshot interrupts and at least one of which has
11additional hardware to support periodic interrupts. The comparators are 11additional hardware to support periodic interrupts. The comparators are
12also called "timers", which can be misleading since usually timers are 12also called "timers", which can be misleading since usually timers are
13independent of each other ... these share a counter, complicating resets. 13independent of each other ... these share a counter, complicating resets.
diff --git a/Documentation/timers/timer_stats.txt b/Documentation/timers/timer_stats.txt
index 20d368c59814..9bd00fc2e823 100644
--- a/Documentation/timers/timer_stats.txt
+++ b/Documentation/timers/timer_stats.txt
@@ -62,7 +62,7 @@ Timerstats sample period: 3.888770 s
62 62
63The first column is the number of events, the second column the pid, the third 63The first column is the number of events, the second column the pid, the third
64column is the name of the process. The forth column shows the function which 64column is the name of the process. The forth column shows the function which
65initialized the timer and in parantheses the callback function which was 65initialized the timer and in parenthesis the callback function which was
66executed on expiry. 66executed on expiry.
67 67
68 Thomas, Ingo 68 Thomas, Ingo
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 000000000000..f157d7594ea7
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,90 @@
1 Event Tracing
2
3 Documentation written by Theodore Ts'o
4 Updated by Li Zefan
5
61. Introduction
7===============
8
9Tracepoints (see Documentation/trace/tracepoints.txt) can be used
10without creating custom kernel modules to register probe functions
11using the event tracing infrastructure.
12
13Not all tracepoints can be traced using the event tracing system;
14the kernel developer must provide code snippets which define how the
15tracing information is saved into the tracing buffer, and how the
16tracing information should be printed.
17
182. Using Event Tracing
19======================
20
212.1 Via the 'set_event' interface
22---------------------------------
23
24The events which are available for tracing can be found in the file
25/debug/tracing/available_events.
26
27To enable a particular event, such as 'sched_wakeup', simply echo it
28to /debug/tracing/set_event. For example:
29
30 # echo sched_wakeup >> /debug/tracing/set_event
31
32[ Note: '>>' is necessary, otherwise it will firstly disable
33 all the events. ]
34
35To disable an event, echo the event name to the set_event file prefixed
36with an exclamation point:
37
38 # echo '!sched_wakeup' >> /debug/tracing/set_event
39
40To disable all events, echo an empty line to the set_event file:
41
42 # echo > /debug/tracing/set_event
43
44To enable all events, echo '*:*' or '*:' to the set_event file:
45
46 # echo *:* > /debug/tracing/set_event
47
48The events are organized into subsystems, such as ext4, irq, sched,
49etc., and a full event name looks like this: <subsystem>:<event>. The
50subsystem name is optional, but it is displayed in the available_events
51file. All of the events in a subsystem can be specified via the syntax
52"<subsystem>:*"; for example, to enable all irq events, you can use the
53command:
54
55 # echo 'irq:*' > /debug/tracing/set_event
56
572.2 Via the 'enable' toggle
58---------------------------
59
60The events available are also listed in /debug/tracing/events/ hierarchy
61of directories.
62
63To enable event 'sched_wakeup':
64
65 # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
66
67To disable it:
68
69 # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
70
71To enable all events in sched subsystem:
72
73 # echo 1 > /debug/tracing/events/sched/enable
74
75To eanble all events:
76
77 # echo 1 > /debug/tracing/events/enable
78
79When reading one of these enable files, there are four results:
80
81 0 - all events this file affects are disabled
82 1 - all events this file affects are enabled
83 X - there is a mixture of events enabled and disabled
84 ? - this file does not affect any event
85
863. Defining an event-enabled tracepoint
87=======================================
88
89See The example provided in samples/trace_events
90
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index fd9a3e693813..7bd27f0e2880 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured.
179 179
180 Function call tracer to trace all kernel functions. 180 Function call tracer to trace all kernel functions.
181 181
182 "function_graph_tracer" 182 "function_graph"
183 183
184 Similar to the function tracer except that the 184 Similar to the function tracer except that the
185 function tracer probes the functions on their entry 185 function tracer probes the functions on their entry
@@ -518,9 +518,18 @@ priority with zero (0) being the highest priority and the nice
518values starting at 100 (nice -20). Below is a quick chart to map 518values starting at 100 (nice -20). Below is a quick chart to map
519the kernel priority to user land priorities. 519the kernel priority to user land priorities.
520 520
521 Kernel priority: 0 to 99 ==> user RT priority 99 to 0 521 Kernel Space User Space
522 Kernel priority: 100 to 139 ==> user nice -20 to 19 522 ===============================================================
523 Kernel priority: 140 ==> idle task priority 523 0(high) to 98(low) user RT priority 99(high) to 1(low)
524 with SCHED_RR or SCHED_FIFO
525 ---------------------------------------------------------------
526 99 sched_priority is not used in scheduling
527 decisions(it must be specified as 0)
528 ---------------------------------------------------------------
529 100(high) to 139(low) user nice -20(high) to 19(low)
530 ---------------------------------------------------------------
531 140 idle task priority
532 ---------------------------------------------------------------
524 533
525The task states are: 534The task states are:
526 535
@@ -1825,4 +1834,4 @@ an error.
1825----------- 1834-----------
1826 1835
1827More details can be found in the source code, in the 1836More details can be found in the source code, in the
1828kernel/tracing/*.c files. 1837kernel/trace/*.c files.
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
index a956d9b7f943..6308735e58ca 100644
--- a/Documentation/trace/kmemtrace.txt
+++ b/Documentation/trace/kmemtrace.txt
@@ -64,7 +64,7 @@ III. Quick usage guide
64CONFIG_KMEMTRACE). 64CONFIG_KMEMTRACE).
65 65
662) Get the userspace tool and build it: 662) Get the userspace tool and build it:
67$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository 67$ git clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/ 68$ cd kmemtrace-user/
69$ ./autogen.sh 69$ ./autogen.sh
70$ ./configure 70$ ./configure
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 000000000000..cd805e16dc27
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
1The power tracer collects detailed information about C-state and P-state
2transitions, instead of just looking at the high-level "average"
3information.
4
5There is a helper script found in scrips/tracing/power.pl in the kernel
6sources which can be used to parse this information and create a
7Scalable Vector Graphics (SVG) picture from the trace data.
8
9To use this tracer:
10
11 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
12 echo power > /sys/kernel/debug/tracing/current_tracer
13 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
14 sleep 1
15 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
16 cat /sys/kernel/debug/tracing/trace | \
17 perl scripts/tracing/power.pl > out.sv
diff --git a/Documentation/usb/WUSB-Design-overview.txt b/Documentation/usb/WUSB-Design-overview.txt
index 4c3d62c7843a..c480e9c32dbd 100644
--- a/Documentation/usb/WUSB-Design-overview.txt
+++ b/Documentation/usb/WUSB-Design-overview.txt
@@ -84,7 +84,7 @@ The different logical parts of this driver are:
84 84
85 *UWB*: the Ultra-Wide-Band stack -- manages the radio and 85 *UWB*: the Ultra-Wide-Band stack -- manages the radio and
86 associated spectrum to allow for devices sharing it. Allows to 86 associated spectrum to allow for devices sharing it. Allows to
87 control bandwidth assingment, beaconing, scanning, etc 87 control bandwidth assignment, beaconing, scanning, etc
88 88
89 * 89 *
90 90
@@ -184,7 +184,7 @@ and sends the replies and notifications back to the API
184[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that 184[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
185is chartered, among other things, to keep the tab of how the UWB radio 185is chartered, among other things, to keep the tab of how the UWB radio
186neighborhood looks, creating and destroying devices as they show up or 186neighborhood looks, creating and destroying devices as they show up or
187dissapear. 187disappear.
188 188
189Command execution is very simple: a command block is sent and a event 189Command execution is very simple: a command block is sent and a event
190block or reply is expected back. For sending/receiving command/events, a 190block or reply is expected back. For sending/receiving command/events, a
@@ -333,7 +333,7 @@ read descriptors and move our data.
333 333
334*Device life cycle and keep alives* 334*Device life cycle and keep alives*
335 335
336Everytime there is a succesful transfer to/from a device, we update a 336Every time there is a successful transfer to/from a device, we update a
337per-device activity timestamp. If not, every now and then we check and 337per-device activity timestamp. If not, every now and then we check and
338if the activity timestamp gets old, we ping the device by sending it a 338if the activity timestamp gets old, we ping the device by sending it a
339Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this 339Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
@@ -411,7 +411,7 @@ context (wa_xfer) and submit it. When the xfer is done, our callback is
411called and we assign the status bits and release the xfer resources. 411called and we assign the status bits and release the xfer resources.
412 412
413In dequeue() we are basically cancelling/aborting the transfer. We issue 413In dequeue() we are basically cancelling/aborting the transfer. We issue
414a xfer abort request to the HC, cancell all the URBs we had submitted 414a xfer abort request to the HC, cancel all the URBs we had submitted
415and not yet done and when all that is done, the xfer callback will be 415and not yet done and when all that is done, the xfer callback will be
416called--this will call the URB callback. 416called--this will call the URB callback.
417 417
diff --git a/Documentation/usb/anchors.txt b/Documentation/usb/anchors.txt
index 6f24f566955a..fe6a99a32bbd 100644
--- a/Documentation/usb/anchors.txt
+++ b/Documentation/usb/anchors.txt
@@ -27,7 +27,7 @@ Association and disassociation of URBs with anchors
27 27
28An association of URBs to an anchor is made by an explicit 28An association of URBs to an anchor is made by an explicit
29call to usb_anchor_urb(). The association is maintained until 29call to usb_anchor_urb(). The association is maintained until
30an URB is finished by (successfull) completion. Thus disassociation 30an URB is finished by (successful) completion. Thus disassociation
31is automatic. A function is provided to forcibly finish (kill) 31is automatic. A function is provided to forcibly finish (kill)
32all URBs associated with an anchor. 32all URBs associated with an anchor.
33Furthermore, disassociation can be made with usb_unanchor_urb() 33Furthermore, disassociation can be made with usb_unanchor_urb()
@@ -76,4 +76,4 @@ usb_get_from_anchor()
76Returns the oldest anchored URB of an anchor. The URB is unanchored 76Returns the oldest anchored URB of an anchor. The URB is unanchored
77and returned with a reference. As you may mix URBs to several 77and returned with a reference. As you may mix URBs to several
78destinations in one anchor you have no guarantee the chronologically 78destinations in one anchor you have no guarantee the chronologically
79first submitted URB is returned. \ No newline at end of file 79first submitted URB is returned.
diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
index 7c812411945b..bfb36b34b79e 100644
--- a/Documentation/usb/callbacks.txt
+++ b/Documentation/usb/callbacks.txt
@@ -65,7 +65,7 @@ Accept or decline an interface. If you accept the device return 0,
65otherwise -ENODEV or -ENXIO. Other error codes should be used only if a 65otherwise -ENODEV or -ENXIO. Other error codes should be used only if a
66genuine error occurred during initialisation which prevented a driver 66genuine error occurred during initialisation which prevented a driver
67from accepting a device that would else have been accepted. 67from accepting a device that would else have been accepted.
68You are strongly encouraged to use usbcore'sfacility, 68You are strongly encouraged to use usbcore's facility,
69usb_set_intfdata(), to associate a data structure with an interface, so 69usb_set_intfdata(), to associate a data structure with an interface, so
70that you know which internal state and identity you associate with a 70that you know which internal state and identity you associate with a
71particular interface. The device will not be suspended and you may do IO 71particular interface. The device will not be suspended and you may do IO
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt
index 914cb7e734a2..4652c0f5da32 100644
--- a/Documentation/video4linux/cx18.txt
+++ b/Documentation/video4linux/cx18.txt
@@ -11,7 +11,7 @@ encoder chip:
112) Some people have problems getting the i2c bus to work. 112) Some people have problems getting the i2c bus to work.
12 The symptom is that the eeprom cannot be read and the card is 12 The symptom is that the eeprom cannot be read and the card is
13 unusable. This is probably fixed, but if you have problems 13 unusable. This is probably fixed, but if you have problems
14 then post to the video4linux or ivtv-users mailinglist. 14 then post to the video4linux or ivtv-users mailing list.
15 15
163) VBI (raw or sliced) has not yet been implemented. 163) VBI (raw or sliced) has not yet been implemented.
17 17
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index e0203662f9e9..8da3a795083f 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -50,6 +50,10 @@ Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
50Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical 50Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
51 pointer to single linked list of struct setup_data. 51 pointer to single linked list of struct setup_data.
52 52
53Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
54 beyond the kernel_alignment added, new init_size and
55 pref_address fields. Added extended boot loader IDs.
56
53**** MEMORY LAYOUT 57**** MEMORY LAYOUT
54 58
55The traditional memory map for the kernel loader, used for Image or 59The traditional memory map for the kernel loader, used for Image or
@@ -168,12 +172,13 @@ Offset Proto Name Meaning
168021C/4 2.00+ ramdisk_size initrd size (set by boot loader) 172021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
1690220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only 1730220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
1700224/2 2.01+ heap_end_ptr Free memory after setup end 1740224/2 2.01+ heap_end_ptr Free memory after setup end
1710226/2 N/A pad1 Unused 1750226/1 2.02+(3 ext_loader_ver Extended boot loader version
1760227/1 2.02+(3 ext_loader_type Extended boot loader ID
1720228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 1770228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
173022C/4 2.03+ ramdisk_max Highest legal initrd address 178022C/4 2.03+ ramdisk_max Highest legal initrd address
1740230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 1790230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
1750234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 1800234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
1760235/1 N/A pad2 Unused 1810235/1 2.10+ min_alignment Minimum alignment, as a power of two
1770236/2 N/A pad3 Unused 1820236/2 N/A pad3 Unused
1780238/4 2.06+ cmdline_size Maximum size of the kernel command line 1830238/4 2.06+ cmdline_size Maximum size of the kernel command line
179023C/4 2.07+ hardware_subarch Hardware subarchitecture 184023C/4 2.07+ hardware_subarch Hardware subarchitecture
@@ -182,6 +187,8 @@ Offset Proto Name Meaning
182024C/4 2.08+ payload_length Length of kernel payload 187024C/4 2.08+ payload_length Length of kernel payload
1830250/8 2.09+ setup_data 64-bit physical pointer to linked list 1880250/8 2.09+ setup_data 64-bit physical pointer to linked list
184 of struct setup_data 189 of struct setup_data
1900258/8 2.10+ pref_address Preferred loading address
1910260/4 2.10+ init_size Linear memory required during initialization
185 192
186(1) For backwards compatibility, if the setup_sects field contains 0, the 193(1) For backwards compatibility, if the setup_sects field contains 0, the
187 real value is 4. 194 real value is 4.
@@ -190,6 +197,8 @@ Offset Proto Name Meaning
190 field are unusable, which means the size of a bzImage kernel 197 field are unusable, which means the size of a bzImage kernel
191 cannot be determined. 198 cannot be determined.
192 199
200(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
201
193If the "HdrS" (0x53726448) magic number is not found at offset 0x202, 202If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
194the boot protocol version is "old". Loading an old kernel, the 203the boot protocol version is "old". Loading an old kernel, the
195following parameters should be assumed: 204following parameters should be assumed:
@@ -343,18 +352,32 @@ Protocol: 2.00+
343 0xTV here, where T is an identifier for the boot loader and V is 352 0xTV here, where T is an identifier for the boot loader and V is
344 a version number. Otherwise, enter 0xFF here. 353 a version number. Otherwise, enter 0xFF here.
345 354
355 For boot loader IDs above T = 0xD, write T = 0xE to this field and
356 write the extended ID minus 0x10 to the ext_loader_type field.
357 Similarly, the ext_loader_ver field can be used to provide more than
358 four bits for the bootloader version.
359
360 For example, for T = 0x15, V = 0x234, write:
361
362 type_of_loader <- 0xE4
363 ext_loader_type <- 0x05
364 ext_loader_ver <- 0x23
365
346 Assigned boot loader ids: 366 Assigned boot loader ids:
347 0 LILO (0x00 reserved for pre-2.00 bootloader) 367 0 LILO (0x00 reserved for pre-2.00 bootloader)
348 1 Loadlin 368 1 Loadlin
349 2 bootsect-loader (0x20, all other values reserved) 369 2 bootsect-loader (0x20, all other values reserved)
350 3 SYSLINUX 370 3 Syslinux
351 4 EtherBoot 371 4 Etherboot/gPXE
352 5 ELILO 372 5 ELILO
353 7 GRUB 373 7 GRUB
354 8 U-BOOT 374 8 U-Boot
355 9 Xen 375 9 Xen
356 A Gujin 376 A Gujin
357 B Qemu 377 B Qemu
378 C Arcturus Networks uCbootloader
379 E Extended (see ext_loader_type)
380 F Special (0xFF = undefined)
358 381
359 Please contact <hpa@zytor.com> if you need a bootloader ID 382 Please contact <hpa@zytor.com> if you need a bootloader ID
360 value assigned. 383 value assigned.
@@ -453,6 +476,35 @@ Protocol: 2.01+
453 Set this field to the offset (from the beginning of the real-mode 476 Set this field to the offset (from the beginning of the real-mode
454 code) of the end of the setup stack/heap, minus 0x0200. 477 code) of the end of the setup stack/heap, minus 0x0200.
455 478
479Field name: ext_loader_ver
480Type: write (optional)
481Offset/size: 0x226/1
482Protocol: 2.02+
483
484 This field is used as an extension of the version number in the
485 type_of_loader field. The total version number is considered to be
486 (type_of_loader & 0x0f) + (ext_loader_ver << 4).
487
488 The use of this field is boot loader specific. If not written, it
489 is zero.
490
491 Kernels prior to 2.6.31 did not recognize this field, but it is safe
492 to write for protocol version 2.02 or higher.
493
494Field name: ext_loader_type
495Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
496Offset/size: 0x227/1
497Protocol: 2.02+
498
499 This field is used as an extension of the type number in
500 type_of_loader field. If the type in type_of_loader is 0xE, then
501 the actual type is (ext_loader_type + 0x10).
502
503 This field is ignored if the type in type_of_loader is not 0xE.
504
505 Kernels prior to 2.6.31 did not recognize this field, but it is safe
506 to write for protocol version 2.02 or higher.
507
456Field name: cmd_line_ptr 508Field name: cmd_line_ptr
457Type: write (obligatory) 509Type: write (obligatory)
458Offset/size: 0x228/4 510Offset/size: 0x228/4
@@ -482,11 +534,19 @@ Protocol: 2.03+
482 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) 534 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
483 535
484Field name: kernel_alignment 536Field name: kernel_alignment
485Type: read (reloc) 537Type: read/modify (reloc)
486Offset/size: 0x230/4 538Offset/size: 0x230/4
487Protocol: 2.05+ 539Protocol: 2.05+ (read), 2.10+ (modify)
540
541 Alignment unit required by the kernel (if relocatable_kernel is
542 true.) A relocatable kernel that is loaded at an alignment
543 incompatible with the value in this field will be realigned during
544 kernel initialization.
488 545
489 Alignment unit required by the kernel (if relocatable_kernel is true.) 546 Starting with protocol version 2.10, this reflects the kernel
547 alignment preferred for optimal performance; it is possible for the
548 loader to modify this field to permit a lesser alignment. See the
549 min_alignment and pref_address field below.
490 550
491Field name: relocatable_kernel 551Field name: relocatable_kernel
492Type: read (reloc) 552Type: read (reloc)
@@ -498,6 +558,22 @@ Protocol: 2.05+
498 After loading, the boot loader must set the code32_start field to 558 After loading, the boot loader must set the code32_start field to
499 point to the loaded code, or to a boot loader hook. 559 point to the loaded code, or to a boot loader hook.
500 560
561Field name: min_alignment
562Type: read (reloc)
563Offset/size: 0x235/1
564Protocol: 2.10+
565
566 This field, if nonzero, indicates as a power of two the minimum
567 alignment required, as opposed to preferred, by the kernel to boot.
568 If a boot loader makes use of this field, it should update the
569 kernel_alignment field with the alignment unit desired; typically:
570
571 kernel_alignment = 1 << min_alignment
572
573 There may be a considerable performance cost with an excessively
574 misaligned kernel. Therefore, a loader should typically try each
575 power-of-two alignment from kernel_alignment down to this alignment.
576
501Field name: cmdline_size 577Field name: cmdline_size
502Type: read 578Type: read
503Offset/size: 0x238/4 579Offset/size: 0x238/4
@@ -582,6 +658,36 @@ Protocol: 2.09+
582 sure to consider the case where the linked list already contains 658 sure to consider the case where the linked list already contains
583 entries. 659 entries.
584 660
661Field name: pref_address
662Type: read (reloc)
663Offset/size: 0x258/8
664Protocol: 2.10+
665
666 This field, if nonzero, represents a preferred load address for the
667 kernel. A relocating bootloader should attempt to load at this
668 address if possible.
669
670 A non-relocatable kernel will unconditionally move itself and to run
671 at this address.
672
673Field name: init_size
674Type: read
675Offset/size: 0x25c/4
676
677 This field indicates the amount of linear contiguous memory starting
678 at the kernel runtime start address that the kernel needs before it
679 is capable of examining its memory map. This is not the same thing
680 as the total amount of memory the kernel needs to boot, but it can
681 be used by a relocating boot loader to help select a safe load
682 address for the kernel.
683
684 The kernel runtime start address is determined by the following algorithm:
685
686 if (relocatable_kernel)
687 runtime_start = align_up(load_address, kernel_alignment)
688 else
689 runtime_start = pref_address
690
585 691
586**** THE IMAGE CHECKSUM 692**** THE IMAGE CHECKSUM
587 693
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 34c13040a718..29a6ff8bc7d3 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here.
5 5
6Machine check 6Machine check
7 7
8 mce=off disable machine check 8 Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
9 mce=bootlog Enable logging of machine checks left over from booting. 9
10 Disabled by default on AMD because some BIOS leave bogus ones. 10 mce=off
11 If your BIOS doesn't do that it's a good idea to enable though 11 Disable machine check
12 to make sure you log even machine check events that result 12 mce=no_cmci
13 in a reboot. On Intel systems it is enabled by default. 13 Disable CMCI(Corrected Machine Check Interrupt) that
14 Intel processor supports. Usually this disablement is
15 not recommended, but it might be handy if your hardware
16 is misbehaving.
17 Note that you'll get more problems without CMCI than with
18 due to the shared banks, i.e. you might get duplicated
19 error logs.
20 mce=dont_log_ce
21 Don't make logs for corrected errors. All events reported
22 as corrected are silently cleared by OS.
23 This option will be useful if you have no interest in any
24 of corrected errors.
25 mce=ignore_ce
26 Disable features for corrected errors, e.g. polling timer
27 and CMCI. All events reported as corrected are not cleared
28 by OS and remained in its error banks.
29 Usually this disablement is not recommended, however if
30 there is an agent checking/clearing corrected errors
31 (e.g. BIOS or hardware monitoring applications), conflicting
32 with OS's error handling, and you cannot deactivate the agent,
33 then this option will be a help.
34 mce=bootlog
35 Enable logging of machine checks left over from booting.
36 Disabled by default on AMD because some BIOS leave bogus ones.
37 If your BIOS doesn't do that it's a good idea to enable though
38 to make sure you log even machine check events that result
39 in a reboot. On Intel systems it is enabled by default.
14 mce=nobootlog 40 mce=nobootlog
15 Disable boot machine check logging. 41 Disable boot machine check logging.
16 mce=tolerancelevel (number) 42 mce=tolerancelevel[,monarchtimeout] (number,number)
43 tolerance levels:
17 0: always panic on uncorrected errors, log corrected errors 44 0: always panic on uncorrected errors, log corrected errors
18 1: panic or SIGBUS on uncorrected errors, log corrected errors 45 1: panic or SIGBUS on uncorrected errors, log corrected errors
19 2: SIGBUS or log uncorrected errors, log corrected errors 46 2: SIGBUS or log uncorrected errors, log corrected errors
20 3: never panic or SIGBUS, log all errors (for testing only) 47 3: never panic or SIGBUS, log all errors (for testing only)
21 Default is 1 48 Default is 1
22 Can be also set using sysfs which is preferable. 49 Can be also set using sysfs which is preferable.
50 monarchtimeout:
51 Sets the time in us to wait for other CPUs on machine checks. 0
52 to disable.
23 53
24 nomce (for compatibility with i386): same as mce=off 54 nomce (for compatibility with i386): same as mce=off
25 55
@@ -150,11 +180,6 @@ NUMA
150 Otherwise, the remaining system RAM is allocated to an 180 Otherwise, the remaining system RAM is allocated to an
151 additional node. 181 additional node.
152 182
153 numa=hotadd=percent
154 Only allow hotadd memory to preallocate page structures upto
155 percent of already available memory.
156 numa=hotadd=0 will disable hotadd memory.
157
158ACPI 183ACPI
159 184
160 acpi=off Don't enable ACPI 185 acpi=off Don't enable ACPI
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index a05e58e7b159..b1fb30273286 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -41,7 +41,9 @@ check_interval
41 the polling interval. When the poller stops finding MCEs, it 41 the polling interval. When the poller stops finding MCEs, it
42 triggers an exponential backoff (poll less often) on the polling 42 triggers an exponential backoff (poll less often) on the polling
43 interval. The check_interval variable is both the initial and 43 interval. The check_interval variable is both the initial and
44 maximum polling interval. 44 maximum polling interval. 0 means no polling for corrected machine
45 check errors (but some corrected errors might be still reported
46 in other ways)
45 47
46tolerant 48tolerant
47 Tolerance level. When a machine check exception occurs for a non 49 Tolerance level. When a machine check exception occurs for a non
@@ -67,6 +69,10 @@ trigger
67 Program to run when a machine check event is detected. 69 Program to run when a machine check event is detected.
68 This is an alternative to running mcelog regularly from cron 70 This is an alternative to running mcelog regularly from cron
69 and allows to detect events faster. 71 and allows to detect events faster.
72monarch_timeout
73 How long to wait for the other CPUs to machine check too on a
74 exception. 0 to disable waiting for other CPUs.
75 Unit: us
70 76
71TBD document entries for AMD threshold interrupt configuration 77TBD document entries for AMD threshold interrupt configuration
72 78
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 29b52b14d0b4..d6498e3cd713 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables:
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
7hole caused by [48:63] sign extension 7hole caused by [48:63] sign extension
8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole 8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
9ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory 9ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
10ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole 10ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space 11ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) 12ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
13... unused hole ... 14... unused hole ...
14ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 15ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
15ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space 16ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space