aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/stable/sysfs-bus-xen-backend75
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-xen_memory77
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci18
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb25
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration12
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-docg334
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff2
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-multitouch9
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-isku135
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-wiimote12
-rw-r--r--Documentation/ABI/testing/sysfs-driver-wacom17
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab4
-rw-r--r--Documentation/DocBook/debugobjects.tmpl50
-rw-r--r--Documentation/DocBook/writing-an-alsa-driver.tmpl2
-rw-r--r--Documentation/HOWTO4
-rw-r--r--Documentation/RCU/checklist.txt6
-rw-r--r--Documentation/RCU/rcu.txt10
-rw-r--r--Documentation/RCU/stallwarn.txt16
-rw-r--r--Documentation/RCU/torture.txt13
-rw-r--r--Documentation/RCU/trace.txt4
-rw-r--r--Documentation/RCU/whatisRCU.txt19
-rw-r--r--Documentation/arm/memory.txt11
-rw-r--r--Documentation/atomic_ops.txt87
-rw-r--r--Documentation/cgroups/cgroups.txt51
-rw-r--r--Documentation/cgroups/memory.txt37
-rw-r--r--Documentation/cgroups/net_prio.txt53
-rw-r--r--Documentation/cpu-freq/governors.txt4
-rw-r--r--Documentation/development-process/5.Posting8
-rw-r--r--Documentation/devices.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/gic.txt4
-rw-r--r--Documentation/devicetree/bindings/arm/insignal-boards.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/samsung-boards.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/tegra.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/vic.txt29
-rw-r--r--Documentation/devicetree/bindings/c6x/clocks.txt40
-rw-r--r--Documentation/devicetree/bindings/c6x/dscr.txt127
-rw-r--r--Documentation/devicetree/bindings/c6x/emifa.txt62
-rw-r--r--Documentation/devicetree/bindings/c6x/interrupt.txt104
-rw-r--r--Documentation/devicetree/bindings/c6x/soc.txt28
-rw-r--r--Documentation/devicetree/bindings/c6x/timer64.txt26
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl330.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-samsung.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-designware.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/trivial-devices.txt58
-rw-r--r--Documentation/devicetree/bindings/input/samsung-keypad.txt88
-rw-r--r--Documentation/devicetree/bindings/input/tegra-kbc.txt18
-rw-r--r--Documentation/devicetree/bindings/mtd/gpio-control-nand.txt44
-rw-r--r--Documentation/devicetree/bindings/net/calxeda-xgmac.txt15
-rw-r--r--Documentation/devicetree/bindings/net/can/cc770.txt53
-rw-r--r--Documentation/devicetree/bindings/net/macb.txt25
-rw-r--r--Documentation/devicetree/bindings/nvec/nvec_nvidia.txt9
-rw-r--r--Documentation/devicetree/bindings/power_supply/olpc_battery.txt5
-rw-r--r--Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt23
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt163
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/srio.txt103
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.txt29
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.txt54
-rw-r--r--Documentation/devicetree/bindings/rtc/s3c-rtc.txt20
-rw-r--r--Documentation/devicetree/bindings/rtc/twl-rtc.txt12
-rw-r--r--Documentation/devicetree/bindings/serial/omap_serial.txt10
-rw-r--r--Documentation/devicetree/bindings/serial/samsung_uart.txt14
-rw-r--r--Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt71
-rw-r--r--Documentation/devicetree/bindings/sound/tegra20-das.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/tegra20-i2s.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/wm8903.txt50
-rw-r--r--Documentation/devicetree/bindings/sound/wm8994.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/tegra-usb.txt13
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt5
-rw-r--r--Documentation/digsig.txt96
-rw-r--r--Documentation/dma-buf-sharing.txt224
-rw-r--r--Documentation/dontdiff1
-rw-r--r--Documentation/driver-model/devres.txt1
-rw-r--r--Documentation/feature-removal-schedule.txt49
-rw-r--r--Documentation/filesystems/Locking8
-rw-r--r--Documentation/filesystems/configfs/configfs.txt2
-rw-r--r--Documentation/filesystems/debugfs.txt56
-rw-r--r--Documentation/filesystems/ext4.txt7
-rw-r--r--Documentation/filesystems/proc.txt42
-rw-r--r--Documentation/filesystems/sysfs.txt2
-rw-r--r--Documentation/filesystems/vfs.txt8
-rw-r--r--Documentation/hwmon/pmbus5
-rw-r--r--Documentation/hwmon/zl610015
-rw-r--r--Documentation/input/alps.txt188
-rw-r--r--Documentation/input/gpio-tilt.txt103
-rw-r--r--Documentation/input/sentelic.txt364
-rw-r--r--Documentation/kdump/kdump.txt35
-rw-r--r--Documentation/kernel-parameters.txt74
-rw-r--r--Documentation/lockdep-design.txt63
-rw-r--r--Documentation/md.txt22
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/batman-adv.txt7
-rw-r--r--Documentation/networking/bonding.txt17
-rw-r--r--Documentation/networking/ieee802154.txt27
-rw-r--r--Documentation/networking/ifenslave.c2
-rw-r--r--Documentation/networking/ip-sysctl.txt13
-rw-r--r--Documentation/networking/openvswitch.txt195
-rw-r--r--Documentation/networking/packet_mmap.txt2
-rw-r--r--Documentation/networking/scaling.txt8
-rw-r--r--Documentation/networking/stmmac.txt16
-rw-r--r--Documentation/networking/team.txt2
-rw-r--r--Documentation/pinctrl.txt258
-rw-r--r--Documentation/power/charger-manager.txt163
-rw-r--r--Documentation/power/devices.txt37
-rw-r--r--Documentation/power/freezing-of-tasks.txt39
-rw-r--r--Documentation/power/regulator/regulator.txt2
-rw-r--r--Documentation/power/runtime_pm.txt130
-rw-r--r--Documentation/s390/Debugging390.txt34
-rw-r--r--Documentation/scsi/53c700.txt21
-rw-r--r--Documentation/security/00-INDEX2
-rw-r--r--Documentation/security/LSM.txt34
-rw-r--r--Documentation/security/credentials.txt6
-rw-r--r--Documentation/serial/driver2
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt15
-rw-r--r--Documentation/sound/alsa/compress_offload.txt188
-rw-r--r--Documentation/sysctl/kernel.txt22
-rw-r--r--Documentation/trace/events-kmem.txt12
-rw-r--r--Documentation/trace/events.txt2
-rw-r--r--Documentation/trace/postprocess/trace-pagealloc-postprocess.pl20
-rw-r--r--Documentation/trace/tracepoint-analysis.txt40
-rw-r--r--Documentation/usb/usbmon.txt14
-rw-r--r--Documentation/vgaarbiter.txt2
-rw-r--r--Documentation/virtual/kvm/api.txt25
-rw-r--r--Documentation/virtual/lguest/.gitignore1
-rw-r--r--Documentation/virtual/lguest/Makefile8
-rw-r--r--Documentation/virtual/lguest/extract58
-rw-r--r--Documentation/virtual/lguest/lguest.c2065
-rw-r--r--Documentation/virtual/lguest/lguest.txt129
-rw-r--r--Documentation/vm/slub.txt7
-rw-r--r--Documentation/watchdog/00-INDEX2
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.txt19
-rw-r--r--Documentation/watchdog/watchdog-kernel-api.txt10
132 files changed, 4554 insertions, 2745 deletions
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
new file mode 100644
index 000000000000..3d5951c8bf5f
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -0,0 +1,75 @@
1What: /sys/bus/xen-backend/devices/*/devtype
2Date: Feb 2009
3KernelVersion: 2.6.38
4Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
5Description:
6 The type of the device. e.g., one of: 'vbd' (block),
7 'vif' (network), or 'vfb' (framebuffer).
8
9What: /sys/bus/xen-backend/devices/*/nodename
10Date: Feb 2009
11KernelVersion: 2.6.38
12Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
13Description:
14 XenStore node (under /local/domain/NNN/) for this
15 backend device.
16
17What: /sys/bus/xen-backend/devices/vbd-*/physical_device
18Date: April 2011
19KernelVersion: 3.0
20Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
21Description:
22 The major:minor number (in hexidecimal) of the
23 physical device providing the storage for this backend
24 block device.
25
26What: /sys/bus/xen-backend/devices/vbd-*/mode
27Date: April 2011
28KernelVersion: 3.0
29Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
30Description:
31 Whether the block device is read-only ('r') or
32 read-write ('w').
33
34What: /sys/bus/xen-backend/devices/vbd-*/statistics/f_req
35Date: April 2011
36KernelVersion: 3.0
37Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
38Description:
39 Number of flush requests from the frontend.
40
41What: /sys/bus/xen-backend/devices/vbd-*/statistics/oo_req
42Date: April 2011
43KernelVersion: 3.0
44Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
45Description:
46 Number of requests delayed because the backend was too
47 busy processing previous requests.
48
49What: /sys/bus/xen-backend/devices/vbd-*/statistics/rd_req
50Date: April 2011
51KernelVersion: 3.0
52Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
53Description:
54 Number of read requests from the frontend.
55
56What: /sys/bus/xen-backend/devices/vbd-*/statistics/rd_sect
57Date: April 2011
58KernelVersion: 3.0
59Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
60Description:
61 Number of sectors read by the frontend.
62
63What: /sys/bus/xen-backend/devices/vbd-*/statistics/wr_req
64Date: April 2011
65KernelVersion: 3.0
66Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
67Description:
68 Number of write requests from the frontend.
69
70What: /sys/bus/xen-backend/devices/vbd-*/statistics/wr_sect
71Date: April 2011
72KernelVersion: 3.0
73Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
74Description:
75 Number of sectors written by the frontend.
diff --git a/Documentation/ABI/stable/sysfs-devices-system-xen_memory b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
new file mode 100644
index 000000000000..caa311d59ac1
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
@@ -0,0 +1,77 @@
1What: /sys/devices/system/xen_memory/xen_memory0/max_retry_count
2Date: May 2011
3KernelVersion: 2.6.39
4Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
5Description:
6 The maximum number of times the balloon driver will
7 attempt to increase the balloon before giving up. See
8 also 'retry_count' below.
9 A value of zero means retry forever and is the default one.
10
11What: /sys/devices/system/xen_memory/xen_memory0/max_schedule_delay
12Date: May 2011
13KernelVersion: 2.6.39
14Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
15Description:
16 The limit that 'schedule_delay' (see below) will be
17 increased to. The default value is 32 seconds.
18
19What: /sys/devices/system/xen_memory/xen_memory0/retry_count
20Date: May 2011
21KernelVersion: 2.6.39
22Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
23Description:
24 The current number of times that the balloon driver
25 has attempted to increase the size of the balloon.
26 The default value is one. With max_retry_count being
27 zero (unlimited), this means that the driver will attempt
28 to retry with a 'schedule_delay' delay.
29
30What: /sys/devices/system/xen_memory/xen_memory0/schedule_delay
31Date: May 2011
32KernelVersion: 2.6.39
33Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
34Description:
35 The time (in seconds) to wait between attempts to
36 increase the balloon. Each time the balloon cannot be
37 increased, 'schedule_delay' is increased (until
38 'max_schedule_delay' is reached at which point it
39 will use the max value).
40
41What: /sys/devices/system/xen_memory/xen_memory0/target
42Date: April 2008
43KernelVersion: 2.6.26
44Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
45Description:
46 The target number of pages to adjust this domain's
47 memory reservation to.
48
49What: /sys/devices/system/xen_memory/xen_memory0/target_kb
50Date: April 2008
51KernelVersion: 2.6.26
52Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
53Description:
54 As target above, except the value is in KiB.
55
56What: /sys/devices/system/xen_memory/xen_memory0/info/current_kb
57Date: April 2008
58KernelVersion: 2.6.26
59Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
60Description:
61 Current size (in KiB) of this domain's memory
62 reservation.
63
64What: /sys/devices/system/xen_memory/xen_memory0/info/high_kb
65Date: April 2008
66KernelVersion: 2.6.26
67Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
68Description:
69 Amount (in KiB) of high memory in the balloon.
70
71What: /sys/devices/system/xen_memory/xen_memory0/info/low_kb
72Date: April 2008
73KernelVersion: 2.6.26
74Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
75Description:
76 Amount (in KiB) of low (or normal) memory in the
77 balloon.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf26ce10..34f51100f029 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
66 re-discover previously removed devices. 66 re-discover previously removed devices.
67 Depends on CONFIG_HOTPLUG. 67 Depends on CONFIG_HOTPLUG.
68 68
69What: /sys/bus/pci/devices/.../msi_irqs/
70Date: September, 2011
71Contact: Neil Horman <nhorman@tuxdriver.com>
72Description:
73 The /sys/devices/.../msi_irqs directory contains a variable set
74 of sub-directories, with each sub-directory being named after a
75 corresponding msi irq vector allocated to that device. Each
76 numbered sub-directory N contains attributes of that irq.
77 Note that this directory is not created for device drivers which
78 do not support msi irqs
79
80What: /sys/bus/pci/devices/.../msi_irqs/<N>/mode
81Date: September 2011
82Contact: Neil Horman <nhorman@tuxdriver.com>
83Description:
84 This attribute indicates the mode that the irq vector named by
85 the parent directory is in (msi vs. msix)
86
69What: /sys/bus/pci/devices/.../remove 87What: /sys/bus/pci/devices/.../remove
70Date: January 2009 88Date: January 2009
71Contact: Linux PCI developers <linux-pci@vger.kernel.org> 89Contact: Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index e647378e9e88..b4f548792e32 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -119,6 +119,31 @@ Description:
119 Write a 1 to force the device to disconnect 119 Write a 1 to force the device to disconnect
120 (equivalent to unplugging a wired USB device). 120 (equivalent to unplugging a wired USB device).
121 121
122What: /sys/bus/usb/drivers/.../new_id
123Date: October 2011
124Contact: linux-usb@vger.kernel.org
125Description:
126 Writing a device ID to this file will attempt to
127 dynamically add a new device ID to a USB device driver.
128 This may allow the driver to support more hardware than
129 was included in the driver's static device ID support
130 table at compile time. The format for the device ID is:
131 idVendor idProduct bInterfaceClass.
132 The vendor ID and device ID fields are required, the
133 interface class is optional.
134 Upon successfully adding an ID, the driver will probe
135 for the device and attempt to bind to it. For example:
136 # echo "8086 10f5" > /sys/bus/usb/drivers/foo/new_id
137
138What: /sys/bus/usb-serial/drivers/.../new_id
139Date: October 2011
140Contact: linux-usb@vger.kernel.org
141Description:
142 For serial USB drivers, this attribute appears under the
143 extra bus folder "usb-serial" in sysfs; apart from that
144 difference, all descriptions from the entry
145 "/sys/bus/usb/drivers/.../new_id" apply.
146
122What: /sys/bus/usb/drivers/.../remove_id 147What: /sys/bus/usb/drivers/.../remove_id
123Date: November 2009 148Date: November 2009
124Contact: CHENG Renquan <rqcheng@smu.edu.sg> 149Contact: CHENG Renquan <rqcheng@smu.edu.sg>
diff --git a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
new file mode 100644
index 000000000000..4cf1e72222d9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
@@ -0,0 +1,12 @@
1What: Attribute for calibrating ST-Ericsson AB8500 Real Time Clock
2Date: Oct 2011
3KernelVersion: 3.0
4Contact: Mark Godfrey <mark.godfrey@stericsson.com>
5Description: The rtc_calibration attribute allows the userspace to
6 calibrate the AB8500.s 32KHz Real Time Clock.
7 Every 60 seconds the AB8500 will correct the RTC's value
8 by adding to it the value of this attribute.
9 The range of the attribute is -127 to +127 in units of
10 30.5 micro-seconds (half-parts-per-million of the 32KHz clock)
11Users: The /vendor/st-ericsson/base_utilities/core/rtc_calibration
12 daemon uses this interface.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-docg3 b/Documentation/ABI/testing/sysfs-devices-platform-docg3
new file mode 100644
index 000000000000..8aa36716882f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-docg3
@@ -0,0 +1,34 @@
1What: /sys/devices/platform/docg3/f[0-3]_dps[01]_is_keylocked
2Date: November 2011
3KernelVersion: 3.3
4Contact: Robert Jarzmik <robert.jarzmik@free.fr>
5Description:
6 Show whether the floor (0 to 4), protection area (0 or 1) is
7 keylocked. Each docg3 chip (or floor) has 2 protection areas,
8 which can cover any part of it, block aligned, called DPS.
9 The protection has information embedded whether it blocks reads,
10 writes or both.
11 The result is:
12 0 -> the DPS is not keylocked
13 1 -> the DPS is keylocked
14Users: None identified so far.
15
16What: /sys/devices/platform/docg3/f[0-3]_dps[01]_protection_key
17Date: November 2011
18KernelVersion: 3.3
19Contact: Robert Jarzmik <robert.jarzmik@free.fr>
20Description:
21 Enter the protection key for the floor (0 to 4), protection area
22 (0 or 1). Each docg3 chip (or floor) has 2 protection areas,
23 which can cover any part of it, block aligned, called DPS.
24 The protection has information embedded whether it blocks reads,
25 writes or both.
26 The protection key is a string of 8 bytes (value 0-255).
27 Entering the correct value toggle the lock, and can be observed
28 through f[0-3]_dps[01]_is_keylocked.
29 Possible values are:
30 - 8 bytes
31 Typical values are:
32 - "00000000"
33 - "12345678"
34Users: None identified so far.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
index 9aec8ef228b0..167d9032b970 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
+++ b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
@@ -1,7 +1,7 @@
1What: /sys/module/hid_logitech/drivers/hid:logitech/<dev>/range. 1What: /sys/module/hid_logitech/drivers/hid:logitech/<dev>/range.
2Date: July 2011 2Date: July 2011
3KernelVersion: 3.2 3KernelVersion: 3.2
4Contact: Michal Malý <madcatxster@gmail.com> 4Contact: Michal Malý <madcatxster@gmail.com>
5Description: Display minimum, maximum and current range of the steering 5Description: Display minimum, maximum and current range of the steering
6 wheel. Writing a value within min and max boundaries sets the 6 wheel. Writing a value within min and max boundaries sets the
7 range of the wheel. 7 range of the wheel.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-multitouch b/Documentation/ABI/testing/sysfs-driver-hid-multitouch
new file mode 100644
index 000000000000..f79839d1af37
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-multitouch
@@ -0,0 +1,9 @@
1What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/quirks
2Date: November 2011
3Contact: Benjamin Tissoires <benjamin.tissoires@gmail.com>
4Description: The integer value of this attribute corresponds to the
5 quirks actually in place to handle the device's protocol.
6 When read, this attribute returns the current settings (see
7 MT_QUIRKS_* in hid-multitouch.c).
8 When written this attribute change on the fly the quirks, then
9 the protocol to handle the device.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku b/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku
new file mode 100644
index 000000000000..189dc43891bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku
@@ -0,0 +1,135 @@
1What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/actual_profile
2Date: June 2011
3Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
4Description: The integer value of this attribute ranges from 0-4.
5 When read, this attribute returns the number of the actual
6 profile. This value is persistent, so its equivalent to the
7 profile that's active when the device is powered on next time.
8 When written, this file sets the number of the startup profile
9 and the device activates this profile immediately.
10Users: http://roccat.sourceforge.net
11
12What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/info
13Date: June 2011
14Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
15Description: When read, this file returns general data like firmware version.
16 The data is 6 bytes long.
17 This file is readonly.
18Users: http://roccat.sourceforge.net
19
20What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/key_mask
21Date: June 2011
22Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
23Description: When written, this file lets one deactivate certain keys like
24 windows and application keys, to prevent accidental presses.
25 Profile number for which this settings occur is included in
26 written data. The data has to be 6 bytes long.
27 Before reading this file, control has to be written to select
28 which profile to read.
29Users: http://roccat.sourceforge.net
30
31What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_capslock
32Date: June 2011
33Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
34Description: When written, this file lets one set the function of the
35 capslock key for a specific profile. Profile number is included
36 in written data. The data has to be 6 bytes long.
37 Before reading this file, control has to be written to select
38 which profile to read.
39Users: http://roccat.sourceforge.net
40
41What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_easyzone
42Date: June 2011
43Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
44Description: When written, this file lets one set the function of the
45 easyzone keys for a specific profile. Profile number is included
46 in written data. The data has to be 65 bytes long.
47 Before reading this file, control has to be written to select
48 which profile to read.
49Users: http://roccat.sourceforge.net
50
51What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_function
52Date: June 2011
53Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
54Description: When written, this file lets one set the function of the
55 function keys for a specific profile. Profile number is included
56 in written data. The data has to be 41 bytes long.
57 Before reading this file, control has to be written to select
58 which profile to read.
59Users: http://roccat.sourceforge.net
60
61What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_macro
62Date: June 2011
63Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
64Description: When written, this file lets one set the function of the macro
65 keys for a specific profile. Profile number is included in
66 written data. The data has to be 35 bytes long.
67 Before reading this file, control has to be written to select
68 which profile to read.
69Users: http://roccat.sourceforge.net
70
71What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_media
72Date: June 2011
73Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
74Description: When written, this file lets one set the function of the media
75 keys for a specific profile. Profile number is included in
76 written data. The data has to be 29 bytes long.
77 Before reading this file, control has to be written to select
78 which profile to read.
79Users: http://roccat.sourceforge.net
80
81What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_thumbster
82Date: June 2011
83Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
84Description: When written, this file lets one set the function of the
85 thumbster keys for a specific profile. Profile number is included
86 in written data. The data has to be 23 bytes long.
87 Before reading this file, control has to be written to select
88 which profile to read.
89Users: http://roccat.sourceforge.net
90
91What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/last_set
92Date: June 2011
93Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
94Description: When written, this file lets one set the time in secs since
95 epoch in which the last configuration took place.
96 The data has to be 20 bytes long.
97Users: http://roccat.sourceforge.net
98
99What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/light
100Date: June 2011
101Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
102Description: When written, this file lets one set the backlight intensity for
103 a specific profile. Profile number is included in written data.
104 The data has to be 10 bytes long.
105 Before reading this file, control has to be written to select
106 which profile to read.
107Users: http://roccat.sourceforge.net
108
109What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/macro
110Date: June 2011
111Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
112Description: When written, this file lets one store macros with max 500
113 keystrokes for a specific button for a specific profile.
114 Button and profile numbers are included in written data.
115 The data has to be 2083 bytes long.
116 Before reading this file, control has to be written to select
117 which profile and key to read.
118Users: http://roccat.sourceforge.net
119
120What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/control
121Date: June 2011
122Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
123Description: When written, this file lets one select which data from which
124 profile will be read next. The data has to be 3 bytes long.
125 This file is writeonly.
126Users: http://roccat.sourceforge.net
127
128What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/talk
129Date: June 2011
130Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
131Description: When written, this file lets one trigger easyshift functionality
132 from the host.
133 The data has to be 16 bytes long.
134 This file is writeonly.
135Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-wiimote b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
index 5d5a16ea57c6..3d98009f447a 100644
--- a/Documentation/ABI/testing/sysfs-driver-hid-wiimote
+++ b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
@@ -8,3 +8,15 @@ Contact: David Herrmann <dh.herrmann@googlemail.com>
8Description: Make it possible to set/get current led state. Reading from it 8Description: Make it possible to set/get current led state. Reading from it
9 returns 0 if led is off and 1 if it is on. Writing 0 to it 9 returns 0 if led is off and 1 if it is on. Writing 0 to it
10 disables the led, writing 1 enables it. 10 disables the led, writing 1 enables it.
11
12What: /sys/bus/hid/drivers/wiimote/<dev>/extension
13Date: August 2011
14KernelVersion: 3.2
15Contact: David Herrmann <dh.herrmann@googlemail.com>
16Description: This file contains the currently connected and initialized
17 extensions. It can be one of: none, motionp, nunchuck, classic,
18 motionp+nunchuck, motionp+classic
19 motionp is the official Nintendo Motion+ extension, nunchuck is
20 the official Nintendo Nunchuck extension and classic is the
21 Nintendo Classic Controller extension. The motionp extension can
22 be combined with the other two.
diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom
index 82d4df136444..0130d6683c14 100644
--- a/Documentation/ABI/testing/sysfs-driver-wacom
+++ b/Documentation/ABI/testing/sysfs-driver-wacom
@@ -15,9 +15,9 @@ Contact: linux-input@vger.kernel.org
15Description: 15Description:
16 Attribute group for control of the status LEDs and the OLEDs. 16 Attribute group for control of the status LEDs and the OLEDs.
17 This attribute group is only available for Intuos 4 M, L, 17 This attribute group is only available for Intuos 4 M, L,
18 and XL (with LEDs and OLEDs) and Cintiq 21UX2 (LEDs only). 18 and XL (with LEDs and OLEDs) and Cintiq 21UX2 and Cintiq 24HD
19 Therefore its presence implicitly signifies the presence of 19 (LEDs only). Therefore its presence implicitly signifies the
20 said LEDs and OLEDs on the tablet device. 20 presence of said LEDs and OLEDs on the tablet device.
21 21
22What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status0_luminance 22What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status0_luminance
23Date: August 2011 23Date: August 2011
@@ -41,16 +41,17 @@ Date: August 2011
41Contact: linux-input@vger.kernel.org 41Contact: linux-input@vger.kernel.org
42Description: 42Description:
43 Writing to this file sets which one of the four (for Intuos 4) 43 Writing to this file sets which one of the four (for Intuos 4)
44 or of the right four (for Cintiq 21UX2) status LEDs is active (0..3). 44 or of the right four (for Cintiq 21UX2 and Cintiq 24HD) status
45 The other three LEDs on the same side are always inactive. 45 LEDs is active (0..3). The other three LEDs on the same side are
46 always inactive.
46 47
47What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status_led1_select 48What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status_led1_select
48Date: September 2011 49Date: September 2011
49Contact: linux-input@vger.kernel.org 50Contact: linux-input@vger.kernel.org
50Description: 51Description:
51 Writing to this file sets which one of the left four (for Cintiq 21UX2) 52 Writing to this file sets which one of the left four (for Cintiq 21UX2
52 status LEDs is active (0..3). The other three LEDs on the left are always 53 and Cintiq 24HD) status LEDs is active (0..3). The other three LEDs on
53 inactive. 54 the left are always inactive.
54 55
55What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/buttons_luminance 56What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/buttons_luminance
56Date: August 2011 57Date: August 2011
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 8b093f8222d3..91bd6ca5440f 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -346,6 +346,10 @@ Description:
346 number of objects per slab. If a slab cannot be allocated 346 number of objects per slab. If a slab cannot be allocated
347 because of fragmentation, SLUB will retry with the minimum order 347 because of fragmentation, SLUB will retry with the minimum order
348 possible depending on its characteristics. 348 possible depending on its characteristics.
349 When debug_guardpage_minorder=N (N > 0) parameter is specified
350 (see Documentation/kernel-parameters.txt), the minimum possible
351 order is used and this sysfs entry can not be used to change
352 the order at run time.
349 353
350What: /sys/kernel/slab/cache/order_fallback 354What: /sys/kernel/slab/cache/order_fallback
351Date: April 2008 355Date: April 2008
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
index 08ff908aa7a2..24979f691e3e 100644
--- a/Documentation/DocBook/debugobjects.tmpl
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -96,6 +96,7 @@
96 <listitem><para>debug_object_deactivate</para></listitem> 96 <listitem><para>debug_object_deactivate</para></listitem>
97 <listitem><para>debug_object_destroy</para></listitem> 97 <listitem><para>debug_object_destroy</para></listitem>
98 <listitem><para>debug_object_free</para></listitem> 98 <listitem><para>debug_object_free</para></listitem>
99 <listitem><para>debug_object_assert_init</para></listitem>
99 </itemizedlist> 100 </itemizedlist>
100 Each of these functions takes the address of the real object and 101 Each of these functions takes the address of the real object and
101 a pointer to the object type specific debug description 102 a pointer to the object type specific debug description
@@ -273,6 +274,26 @@
273 debug checks. 274 debug checks.
274 </para> 275 </para>
275 </sect1> 276 </sect1>
277
278 <sect1 id="debug_object_assert_init">
279 <title>debug_object_assert_init</title>
280 <para>
281 This function is called to assert that an object has been
282 initialized.
283 </para>
284 <para>
285 When the real object is not tracked by debugobjects, it calls
286 fixup_assert_init of the object type description structure
287 provided by the caller, with the hardcoded object state
288 ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
289 by calling debug_object_init and other specific initializing
290 functions.
291 </para>
292 <para>
293 When the real object is already tracked by debugobjects it is
294 ignored.
295 </para>
296 </sect1>
276 </chapter> 297 </chapter>
277 <chapter id="fixupfunctions"> 298 <chapter id="fixupfunctions">
278 <title>Fixup functions</title> 299 <title>Fixup functions</title>
@@ -381,6 +402,35 @@
381 statistics. 402 statistics.
382 </para> 403 </para>
383 </sect1> 404 </sect1>
405 <sect1 id="fixup_assert_init">
406 <title>fixup_assert_init</title>
407 <para>
408 This function is called from the debug code whenever a problem
409 in debug_object_assert_init is detected.
410 </para>
411 <para>
412 Called from debug_object_assert_init() with a hardcoded state
413 ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
414 debug bucket.
415 </para>
416 <para>
417 The function returns 1 when the fixup was successful,
418 otherwise 0. The return value is used to update the
419 statistics.
420 </para>
421 <para>
422 Note, this function should make sure debug_object_init() is
423 called before returning.
424 </para>
425 <para>
426 The handling of statically initialized objects is a special
427 case. The fixup function should check if this is a legitimate
428 case of a statically initialized object or not. In this case only
429 debug_object_init() should be called to make the object known to
430 the tracker. Then the function should return 0 because this is not
431 a real fixup.
432 </para>
433 </sect1>
384 </chapter> 434 </chapter>
385 <chapter id="bugs"> 435 <chapter id="bugs">
386 <title>Known Bugs And Assumptions</title> 436 <title>Known Bugs And Assumptions</title>
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl
index 5de23c007078..cab4ec58e46e 100644
--- a/Documentation/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl
@@ -404,7 +404,7 @@
404 /* SNDRV_CARDS: maximum number of cards supported by this module */ 404 /* SNDRV_CARDS: maximum number of cards supported by this module */
405 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; 405 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
406 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; 406 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
407 static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; 407 static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
408 408
409 /* definition of the chip-specific record */ 409 /* definition of the chip-specific record */
410 struct mychip { 410 struct mychip {
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 81bc1a9ab9d8..f7ade3b3b40d 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -275,8 +275,8 @@ versions.
275If no 2.6.x.y kernel is available, then the highest numbered 2.6.x 275If no 2.6.x.y kernel is available, then the highest numbered 2.6.x
276kernel is the current stable kernel. 276kernel is the current stable kernel.
277 277
2782.6.x.y are maintained by the "stable" team <stable@kernel.org>, and are 2782.6.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and
279released as needs dictate. The normal release period is approximately 279are released as needs dictate. The normal release period is approximately
280two weeks, but it can be longer if there are no pressing problems. A 280two weeks, but it can be longer if there are no pressing problems. A
281security-related problem, instead, can cause a release to happen almost 281security-related problem, instead, can cause a release to happen almost
282instantly. 282instantly.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 0c134f8afc6f..bff2d8be1e18 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -328,6 +328,12 @@ over a rather long period of time, but improvements are always welcome!
328 RCU rather than SRCU, because RCU is almost always faster and 328 RCU rather than SRCU, because RCU is almost always faster and
329 easier to use than is SRCU. 329 easier to use than is SRCU.
330 330
331 If you need to enter your read-side critical section in a
332 hardirq or exception handler, and then exit that same read-side
333 critical section in the task that was interrupted, then you need
334 to srcu_read_lock_raw() and srcu_read_unlock_raw(), which avoid
335 the lockdep checking that would otherwise this practice illegal.
336
331 Also unlike other forms of RCU, explicit initialization 337 Also unlike other forms of RCU, explicit initialization
332 and cleanup is required via init_srcu_struct() and 338 and cleanup is required via init_srcu_struct() and
333 cleanup_srcu_struct(). These are passed a "struct srcu_struct" 339 cleanup_srcu_struct(). These are passed a "struct srcu_struct"
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 31852705b586..bf778332a28f 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -38,11 +38,11 @@ o How can the updater tell when a grace period has completed
38 38
39 Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the 39 Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the
40 same effect, but require that the readers manipulate CPU-local 40 same effect, but require that the readers manipulate CPU-local
41 counters. These counters allow limited types of blocking 41 counters. These counters allow limited types of blocking within
42 within RCU read-side critical sections. SRCU also uses 42 RCU read-side critical sections. SRCU also uses CPU-local
43 CPU-local counters, and permits general blocking within 43 counters, and permits general blocking within RCU read-side
44 RCU read-side critical sections. These two variants of 44 critical sections. These variants of RCU detect grace periods
45 RCU detect grace periods by sampling these counters. 45 by sampling these counters.
46 46
47o If I am running on a uniprocessor kernel, which can only do one 47o If I am running on a uniprocessor kernel, which can only do one
48 thing at a time, why should I wait for a grace period? 48 thing at a time, why should I wait for a grace period?
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 4e959208f736..083d88cbc089 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -101,6 +101,11 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
101 CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning 101 CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning
102 messages. 102 messages.
103 103
104o A hardware or software issue shuts off the scheduler-clock
105 interrupt on a CPU that is not in dyntick-idle mode. This
106 problem really has happened, and seems to be most likely to
107 result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels.
108
104o A bug in the RCU implementation. 109o A bug in the RCU implementation.
105 110
106o A hardware failure. This is quite unlikely, but has occurred 111o A hardware failure. This is quite unlikely, but has occurred
@@ -109,12 +114,11 @@ o A hardware failure. This is quite unlikely, but has occurred
109 This resulted in a series of RCU CPU stall warnings, eventually 114 This resulted in a series of RCU CPU stall warnings, eventually
110 leading the realization that the CPU had failed. 115 leading the realization that the CPU had failed.
111 116
112The RCU, RCU-sched, and RCU-bh implementations have CPU stall 117The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning.
113warning. SRCU does not have its own CPU stall warnings, but its 118SRCU does not have its own CPU stall warnings, but its calls to
114calls to synchronize_sched() will result in RCU-sched detecting 119synchronize_sched() will result in RCU-sched detecting RCU-sched-related
115RCU-sched-related CPU stalls. Please note that RCU only detects 120CPU stalls. Please note that RCU only detects CPU stalls when there is
116CPU stalls when there is a grace period in progress. No grace period, 121a grace period in progress. No grace period, no CPU stall warnings.
117no CPU stall warnings.
118 122
119To diagnose the cause of the stall, inspect the stack traces. 123To diagnose the cause of the stall, inspect the stack traces.
120The offending function will usually be near the top of the stack. 124The offending function will usually be near the top of the stack.
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 783d6c134d3f..d67068d0d2b9 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -61,11 +61,24 @@ nreaders This is the number of RCU reading threads supported.
61 To properly exercise RCU implementations with preemptible 61 To properly exercise RCU implementations with preemptible
62 read-side critical sections. 62 read-side critical sections.
63 63
64onoff_interval
65 The number of seconds between each attempt to execute a
66 randomly selected CPU-hotplug operation. Defaults to
67 zero, which disables CPU hotplugging. In HOTPLUG_CPU=n
68 kernels, rcutorture will silently refuse to do any
69 CPU-hotplug operations regardless of what value is
70 specified for onoff_interval.
71
64shuffle_interval 72shuffle_interval
65 The number of seconds to keep the test threads affinitied 73 The number of seconds to keep the test threads affinitied
66 to a particular subset of the CPUs, defaults to 3 seconds. 74 to a particular subset of the CPUs, defaults to 3 seconds.
67 Used in conjunction with test_no_idle_hz. 75 Used in conjunction with test_no_idle_hz.
68 76
77shutdown_secs The number of seconds to run the test before terminating
78 the test and powering off the system. The default is
79 zero, which disables test termination and system shutdown.
80 This capability is useful for automated testing.
81
69stat_interval The number of seconds between output of torture 82stat_interval The number of seconds between output of torture
70 statistics (via printk()). Regardless of the interval, 83 statistics (via printk()). Regardless of the interval,
71 statistics are printed when the module is unloaded. 84 statistics are printed when the module is unloaded.
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index aaf65f6c6cd7..49587abfc2f7 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -105,14 +105,10 @@ o "dt" is the current value of the dyntick counter that is incremented
105 or one greater than the interrupt-nesting depth otherwise. 105 or one greater than the interrupt-nesting depth otherwise.
106 The number after the second "/" is the NMI nesting depth. 106 The number after the second "/" is the NMI nesting depth.
107 107
108 This field is displayed only for CONFIG_NO_HZ kernels.
109
110o "df" is the number of times that some other CPU has forced a 108o "df" is the number of times that some other CPU has forced a
111 quiescent state on behalf of this CPU due to this CPU being in 109 quiescent state on behalf of this CPU due to this CPU being in
112 dynticks-idle state. 110 dynticks-idle state.
113 111
114 This field is displayed only for CONFIG_NO_HZ kernels.
115
116o "of" is the number of times that some other CPU has forced a 112o "of" is the number of times that some other CPU has forced a
117 quiescent state on behalf of this CPU due to this CPU being 113 quiescent state on behalf of this CPU due to this CPU being
118 offline. In a perfect world, this might never happen, but it 114 offline. In a perfect world, this might never happen, but it
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6ef692667e2f..6bbe8dcdc3da 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -4,6 +4,7 @@ to start learning about RCU:
41. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ 41. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
52. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ 52. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
63. RCU part 3: the RCU API http://lwn.net/Articles/264090/ 63. RCU part 3: the RCU API http://lwn.net/Articles/264090/
74. The RCU API, 2010 Edition http://lwn.net/Articles/418853/
7 8
8 9
9What is RCU? 10What is RCU?
@@ -834,6 +835,8 @@ SRCU: Critical sections Grace period Barrier
834 835
835 srcu_read_lock synchronize_srcu N/A 836 srcu_read_lock synchronize_srcu N/A
836 srcu_read_unlock synchronize_srcu_expedited 837 srcu_read_unlock synchronize_srcu_expedited
838 srcu_read_lock_raw
839 srcu_read_unlock_raw
837 srcu_dereference 840 srcu_dereference
838 841
839SRCU: Initialization/cleanup 842SRCU: Initialization/cleanup
@@ -855,27 +858,33 @@ list can be helpful:
855 858
856a. Will readers need to block? If so, you need SRCU. 859a. Will readers need to block? If so, you need SRCU.
857 860
858b. What about the -rt patchset? If readers would need to block 861b. Is it necessary to start a read-side critical section in a
862 hardirq handler or exception handler, and then to complete
863 this read-side critical section in the task that was
864 interrupted? If so, you need SRCU's srcu_read_lock_raw() and
865 srcu_read_unlock_raw() primitives.
866
867c. What about the -rt patchset? If readers would need to block
859 in an non-rt kernel, you need SRCU. If readers would block 868 in an non-rt kernel, you need SRCU. If readers would block
860 in a -rt kernel, but not in a non-rt kernel, SRCU is not 869 in a -rt kernel, but not in a non-rt kernel, SRCU is not
861 necessary. 870 necessary.
862 871
863c. Do you need to treat NMI handlers, hardirq handlers, 872d. Do you need to treat NMI handlers, hardirq handlers,
864 and code segments with preemption disabled (whether 873 and code segments with preemption disabled (whether
865 via preempt_disable(), local_irq_save(), local_bh_disable(), 874 via preempt_disable(), local_irq_save(), local_bh_disable(),
866 or some other mechanism) as if they were explicit RCU readers? 875 or some other mechanism) as if they were explicit RCU readers?
867 If so, you need RCU-sched. 876 If so, you need RCU-sched.
868 877
869d. Do you need RCU grace periods to complete even in the face 878e. Do you need RCU grace periods to complete even in the face
870 of softirq monopolization of one or more of the CPUs? For 879 of softirq monopolization of one or more of the CPUs? For
871 example, is your code subject to network-based denial-of-service 880 example, is your code subject to network-based denial-of-service
872 attacks? If so, you need RCU-bh. 881 attacks? If so, you need RCU-bh.
873 882
874e. Is your workload too update-intensive for normal use of 883f. Is your workload too update-intensive for normal use of
875 RCU, but inappropriate for other synchronization mechanisms? 884 RCU, but inappropriate for other synchronization mechanisms?
876 If so, consider SLAB_DESTROY_BY_RCU. But please be careful! 885 If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
877 886
878f. Otherwise, use RCU. 887g. Otherwise, use RCU.
879 888
880Of course, this all assumes that you have determined that RCU is in fact 889Of course, this all assumes that you have determined that RCU is in fact
881the right tool for your job. 890the right tool for your job.
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index 771d48d3b335..208a2d465b92 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -51,15 +51,14 @@ ffc00000 ffefffff DMA memory mapping region. Memory returned
51ff000000 ffbfffff Reserved for future expansion of DMA 51ff000000 ffbfffff Reserved for future expansion of DMA
52 mapping region. 52 mapping region.
53 53
54VMALLOC_END feffffff Free for platform use, recommended.
55 VMALLOC_END must be aligned to a 2MB
56 boundary.
57
58VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space. 54VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space.
59 Memory returned by vmalloc/ioremap will 55 Memory returned by vmalloc/ioremap will
60 be dynamically placed in this region. 56 be dynamically placed in this region.
61 VMALLOC_START may be based upon the value 57 Machine specific static mappings are also
62 of the high_memory variable. 58 located here through iotable_init().
59 VMALLOC_START is based upon the value
60 of the high_memory variable, and VMALLOC_END
61 is equal to 0xff000000.
63 62
64PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region. 63PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region.
65 This maps the platforms RAM, and typically 64 This maps the platforms RAM, and typically
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 3bd585b44927..27f2b21a9d5c 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -84,6 +84,93 @@ compiler optimizes the section accessing atomic_t variables.
84 84
85*** YOU HAVE BEEN WARNED! *** 85*** YOU HAVE BEEN WARNED! ***
86 86
87Properly aligned pointers, longs, ints, and chars (and unsigned
88equivalents) may be atomically loaded from and stored to in the same
89sense as described for atomic_read() and atomic_set(). The ACCESS_ONCE()
90macro should be used to prevent the compiler from using optimizations
91that might otherwise optimize accesses out of existence on the one hand,
92or that might create unsolicited accesses on the other.
93
94For example consider the following code:
95
96 while (a > 0)
97 do_something();
98
99If the compiler can prove that do_something() does not store to the
100variable a, then the compiler is within its rights transforming this to
101the following:
102
103 tmp = a;
104 if (a > 0)
105 for (;;)
106 do_something();
107
108If you don't want the compiler to do this (and you probably don't), then
109you should use something like the following:
110
111 while (ACCESS_ONCE(a) < 0)
112 do_something();
113
114Alternatively, you could place a barrier() call in the loop.
115
116For another example, consider the following code:
117
118 tmp_a = a;
119 do_something_with(tmp_a);
120 do_something_else_with(tmp_a);
121
122If the compiler can prove that do_something_with() does not store to the
123variable a, then the compiler is within its rights to manufacture an
124additional load as follows:
125
126 tmp_a = a;
127 do_something_with(tmp_a);
128 tmp_a = a;
129 do_something_else_with(tmp_a);
130
131This could fatally confuse your code if it expected the same value
132to be passed to do_something_with() and do_something_else_with().
133
134The compiler would be likely to manufacture this additional load if
135do_something_with() was an inline function that made very heavy use
136of registers: reloading from variable a could save a flush to the
137stack and later reload. To prevent the compiler from attacking your
138code in this manner, write the following:
139
140 tmp_a = ACCESS_ONCE(a);
141 do_something_with(tmp_a);
142 do_something_else_with(tmp_a);
143
144For a final example, consider the following code, assuming that the
145variable a is set at boot time before the second CPU is brought online
146and never changed later, so that memory barriers are not needed:
147
148 if (a)
149 b = 9;
150 else
151 b = 42;
152
153The compiler is within its rights to manufacture an additional store
154by transforming the above code into the following:
155
156 b = 42;
157 if (a)
158 b = 9;
159
160This could come as a fatal surprise to other code running concurrently
161that expected b to never have the value 42 if a was zero. To prevent
162the compiler from doing this, write something like:
163
164 if (a)
165 ACCESS_ONCE(b) = 9;
166 else
167 ACCESS_ONCE(b) = 42;
168
169Don't even -think- about doing this without proper use of memory barriers,
170locks, or atomic operations if variable a can change at runtime!
171
172*** WARNING: ACCESS_ONCE() DOES NOT IMPLY A BARRIER! ***
173
87Now, we move onto the atomic operation interfaces typically implemented with 174Now, we move onto the atomic operation interfaces typically implemented with
88the help of assembly code. 175the help of assembly code.
89 176
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 9c452ef2328c..a7c96ae5557c 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -594,53 +594,44 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
594called multiple times against a cgroup. 594called multiple times against a cgroup.
595 595
596int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 596int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
597 struct task_struct *task) 597 struct cgroup_taskset *tset)
598(cgroup_mutex held by caller) 598(cgroup_mutex held by caller)
599 599
600Called prior to moving a task into a cgroup; if the subsystem 600Called prior to moving one or more tasks into a cgroup; if the
601returns an error, this will abort the attach operation. If a NULL 601subsystem returns an error, this will abort the attach operation.
602task is passed, then a successful result indicates that *any* 602@tset contains the tasks to be attached and is guaranteed to have at
603unspecified task can be moved into the cgroup. Note that this isn't 603least one task in it.
604called on a fork. If this method returns 0 (success) then this should 604
605remain valid while the caller holds cgroup_mutex and it is ensured that either 605If there are multiple tasks in the taskset, then:
606 - it's guaranteed that all are from the same thread group
607 - @tset contains all tasks from the thread group whether or not
608 they're switching cgroups
609 - the first task is the leader
610
611Each @tset entry also contains the task's old cgroup and tasks which
612aren't switching cgroup can be skipped easily using the
613cgroup_taskset_for_each() iterator. Note that this isn't called on a
614fork. If this method returns 0 (success) then this should remain valid
615while the caller holds cgroup_mutex and it is ensured that either
606attach() or cancel_attach() will be called in future. 616attach() or cancel_attach() will be called in future.
607 617
608int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
609(cgroup_mutex held by caller)
610
611As can_attach, but for operations that must be run once per task to be
612attached (possibly many when using cgroup_attach_proc). Called after
613can_attach.
614
615void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 618void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
616 struct task_struct *task, bool threadgroup) 619 struct cgroup_taskset *tset)
617(cgroup_mutex held by caller) 620(cgroup_mutex held by caller)
618 621
619Called when a task attach operation has failed after can_attach() has succeeded. 622Called when a task attach operation has failed after can_attach() has succeeded.
620A subsystem whose can_attach() has some side-effects should provide this 623A subsystem whose can_attach() has some side-effects should provide this
621function, so that the subsystem can implement a rollback. If not, not necessary. 624function, so that the subsystem can implement a rollback. If not, not necessary.
622This will be called only about subsystems whose can_attach() operation have 625This will be called only about subsystems whose can_attach() operation have
623succeeded. 626succeeded. The parameters are identical to can_attach().
624
625void pre_attach(struct cgroup *cgrp);
626(cgroup_mutex held by caller)
627
628For any non-per-thread attachment work that needs to happen before
629attach_task. Needed by cpuset.
630 627
631void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 628void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
632 struct cgroup *old_cgrp, struct task_struct *task) 629 struct cgroup_taskset *tset)
633(cgroup_mutex held by caller) 630(cgroup_mutex held by caller)
634 631
635Called after the task has been attached to the cgroup, to allow any 632Called after the task has been attached to the cgroup, to allow any
636post-attachment activity that requires memory allocations or blocking. 633post-attachment activity that requires memory allocations or blocking.
637 634The parameters are identical to can_attach().
638void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
639(cgroup_mutex held by caller)
640
641As attach, but for operations that must be run once per task to be attached,
642like can_attach_task. Called before attach. Currently does not support any
643subsystem that might need the old_cgrp for every thread in the group.
644 635
645void fork(struct cgroup_subsy *ss, struct task_struct *task) 636void fork(struct cgroup_subsy *ss, struct task_struct *task)
646 637
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index cc0ebc5241b3..4c95c0034a4b 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -44,8 +44,8 @@ Features:
44 - oom-killer disable knob and oom-notifier 44 - oom-killer disable knob and oom-notifier
45 - Root cgroup has no limit controls. 45 - Root cgroup has no limit controls.
46 46
47 Kernel memory and Hugepages are not under control yet. We just manage 47 Kernel memory support is work in progress, and the current version provides
48 pages on LRU. To add more controls, we have to take care of performance. 48 basically functionality. (See Section 2.7)
49 49
50Brief summary of control files. 50Brief summary of control files.
51 51
@@ -61,7 +61,7 @@ Brief summary of control files.
61 memory.failcnt # show the number of memory usage hits limits 61 memory.failcnt # show the number of memory usage hits limits
62 memory.memsw.failcnt # show the number of memory+Swap hits limits 62 memory.memsw.failcnt # show the number of memory+Swap hits limits
63 memory.max_usage_in_bytes # show max memory usage recorded 63 memory.max_usage_in_bytes # show max memory usage recorded
64 memory.memsw.usage_in_bytes # show max memory+Swap usage recorded 64 memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
65 memory.soft_limit_in_bytes # set/show soft limit of memory usage 65 memory.soft_limit_in_bytes # set/show soft limit of memory usage
66 memory.stat # show various statistics 66 memory.stat # show various statistics
67 memory.use_hierarchy # set/show hierarchical account enabled 67 memory.use_hierarchy # set/show hierarchical account enabled
@@ -72,6 +72,9 @@ Brief summary of control files.
72 memory.oom_control # set/show oom controls. 72 memory.oom_control # set/show oom controls.
73 memory.numa_stat # show the number of memory usage per numa node 73 memory.numa_stat # show the number of memory usage per numa node
74 74
75 memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
76 memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
77
751. History 781. History
76 79
77The memory controller has a long history. A request for comments for the memory 80The memory controller has a long history. A request for comments for the memory
@@ -255,6 +258,27 @@ When oom event notifier is registered, event will be delivered.
255 per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by 258 per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
256 zone->lru_lock, it has no lock of its own. 259 zone->lru_lock, it has no lock of its own.
257 260
2612.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
262
263With the Kernel memory extension, the Memory Controller is able to limit
264the amount of kernel memory used by the system. Kernel memory is fundamentally
265different than user memory, since it can't be swapped out, which makes it
266possible to DoS the system by consuming too much of this precious resource.
267
268Kernel memory limits are not imposed for the root cgroup. Usage for the root
269cgroup may or may not be accounted.
270
271Currently no soft limit is implemented for kernel memory. It is future work
272to trigger slab reclaim when those limits are reached.
273
2742.7.1 Current Kernel Memory resources accounted
275
276* sockets memory pressure: some sockets protocols have memory pressure
277thresholds. The Memory Controller allows them to be controlled individually
278per cgroup, instead of globally.
279
280* tcp memory pressure: sockets memory pressure for the tcp protocol.
281
2583. User Interface 2823. User Interface
259 283
2600. Configuration 2840. Configuration
@@ -386,8 +410,11 @@ memory.stat file includes following statistics
386cache - # of bytes of page cache memory. 410cache - # of bytes of page cache memory.
387rss - # of bytes of anonymous and swap cache memory. 411rss - # of bytes of anonymous and swap cache memory.
388mapped_file - # of bytes of mapped file (includes tmpfs/shmem) 412mapped_file - # of bytes of mapped file (includes tmpfs/shmem)
389pgpgin - # of pages paged in (equivalent to # of charging events). 413pgpgin - # of charging events to the memory cgroup. The charging
390pgpgout - # of pages paged out (equivalent to # of uncharging events). 414 event happens each time a page is accounted as either mapped
415 anon page(RSS) or cache page(Page Cache) to the cgroup.
416pgpgout - # of uncharging events to the memory cgroup. The uncharging
417 event happens each time a page is unaccounted from the cgroup.
391swap - # of bytes of swap usage 418swap - # of bytes of swap usage
392inactive_anon - # of bytes of anonymous memory and swap cache memory on 419inactive_anon - # of bytes of anonymous memory and swap cache memory on
393 LRU list. 420 LRU list.
diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroups/net_prio.txt
new file mode 100644
index 000000000000..01b322635591
--- /dev/null
+++ b/Documentation/cgroups/net_prio.txt
@@ -0,0 +1,53 @@
1Network priority cgroup
2-------------------------
3
4The Network priority cgroup provides an interface to allow an administrator to
5dynamically set the priority of network traffic generated by various
6applications
7
8Nominally, an application would set the priority of its traffic via the
9SO_PRIORITY socket option. This however, is not always possible because:
10
111) The application may not have been coded to set this value
122) The priority of application traffic is often a site-specific administrative
13 decision rather than an application defined one.
14
15This cgroup allows an administrator to assign a process to a group which defines
16the priority of egress traffic on a given interface. Network priority groups can
17be created by first mounting the cgroup filesystem.
18
19# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
20
21With the above step, the initial group acting as the parent accounting group
22becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in
23the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
24
25Each net_prio cgroup contains two files that are subsystem specific
26
27net_prio.prioidx
28This file is read-only, and is simply informative. It contains a unique integer
29value that the kernel uses as an internal representation of this cgroup.
30
31net_prio.ifpriomap
32This file contains a map of the priorities assigned to traffic originating from
33processes in this group and egressing the system on various interfaces. It
34contains a list of tuples in the form <ifname priority>. Contents of this file
35can be modified by echoing a string into the file using the same tuple format.
36for example:
37
38echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
39
40This command would force any traffic originating from processes belonging to the
41iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
42said traffic set to the value 5. The parent accounting group also has a
43writeable 'net_prio.ifpriomap' file that can be used to set a system default
44priority.
45
46Priorities are set immediately prior to queueing a frame to the device
47queueing discipline (qdisc) so priorities will be assigned prior to the hardware
48queue selection being made.
49
50One usage for the net_prio cgroup is with mqprio qdisc allowing application
51traffic to be steered to hardware/driver based traffic classes. These mappings
52can then be managed by administrators or other networking protocols such as
53DCBX.
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index d221781dabaa..c7a2eb8450c2 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -127,7 +127,7 @@ in the bash (as said, 1000 is default), do:
127echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \ 127echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \
128 >ondemand/sampling_rate 128 >ondemand/sampling_rate
129 129
130show_sampling_rate_min: 130sampling_rate_min:
131The sampling rate is limited by the HW transition latency: 131The sampling rate is limited by the HW transition latency:
132transition_latency * 100 132transition_latency * 100
133Or by kernel restrictions: 133Or by kernel restrictions:
@@ -140,8 +140,6 @@ HZ=100: min=200000us (200ms)
140The highest value of kernel and HW latency restrictions is shown and 140The highest value of kernel and HW latency restrictions is shown and
141used as the minimum sampling rate. 141used as the minimum sampling rate.
142 142
143show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT.
144
145up_threshold: defines what the average CPU usage between the samplings 143up_threshold: defines what the average CPU usage between the samplings
146of 'sampling_rate' needs to be for the kernel to make a decision on 144of 'sampling_rate' needs to be for the kernel to make a decision on
147whether it should increase the frequency. For example when it is set 145whether it should increase the frequency. For example when it is set
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index 903a2546f138..8a48c9b62864 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting
@@ -271,10 +271,10 @@ copies should go to:
271 the linux-kernel list. 271 the linux-kernel list.
272 272
273 - If you are fixing a bug, think about whether the fix should go into the 273 - If you are fixing a bug, think about whether the fix should go into the
274 next stable update. If so, stable@kernel.org should get a copy of the 274 next stable update. If so, stable@vger.kernel.org should get a copy of
275 patch. Also add a "Cc: stable@kernel.org" to the tags within the patch 275 the patch. Also add a "Cc: stable@vger.kernel.org" to the tags within
276 itself; that will cause the stable team to get a notification when your 276 the patch itself; that will cause the stable team to get a notification
277 fix goes into the mainline. 277 when your fix goes into the mainline.
278 278
279When selecting recipients for a patch, it is good to have an idea of who 279When selecting recipients for a patch, it is good to have an idea of who
280you think will eventually accept the patch and get it merged. While it 280you think will eventually accept the patch and get it merged. While it
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index eccffe715229..cec8864ce4e8 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -379,7 +379,7 @@ Your cooperation is appreciated.
379 162 = /dev/smbus System Management Bus 379 162 = /dev/smbus System Management Bus
380 163 = /dev/lik Logitech Internet Keyboard 380 163 = /dev/lik Logitech Internet Keyboard
381 164 = /dev/ipmo Intel Intelligent Platform Management 381 164 = /dev/ipmo Intel Intelligent Platform Management
382 165 = /dev/vmmon VMWare virtual machine monitor 382 165 = /dev/vmmon VMware virtual machine monitor
383 166 = /dev/i2o/ctl I2O configuration manager 383 166 = /dev/i2o/ctl I2O configuration manager
384 167 = /dev/specialix_sxctl Specialix serial control 384 167 = /dev/specialix_sxctl Specialix serial control
385 168 = /dev/tcldrv Technology Concepts serial control 385 168 = /dev/tcldrv Technology Concepts serial control
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt
index c9848ad0e2e3..54bdddadf1cf 100644
--- a/Documentation/devicetree/bindings/arm/fsl.txt
+++ b/Documentation/devicetree/bindings/arm/fsl.txt
@@ -21,6 +21,10 @@ i.MX53 Smart Mobile Reference Design Board
21Required root node properties: 21Required root node properties:
22 - compatible = "fsl,imx53-smd", "fsl,imx53"; 22 - compatible = "fsl,imx53-smd", "fsl,imx53";
23 23
24i.MX6 Quad SABRE Automotive Board 24i.MX6 Quad Armadillo2 Board
25Required root node properties: 25Required root node properties:
26 - compatible = "fsl,imx6q-sabreauto", "fsl,imx6q"; 26 - compatible = "fsl,imx6q-arm2", "fsl,imx6q";
27
28i.MX6 Quad SABRE Lite Board
29Required root node properties:
30 - compatible = "fsl,imx6q-sabrelite", "fsl,imx6q";
diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt
index 52916b4aa1fe..9b4b82a721b6 100644
--- a/Documentation/devicetree/bindings/arm/gic.txt
+++ b/Documentation/devicetree/bindings/arm/gic.txt
@@ -42,6 +42,10 @@ Optional
42- interrupts : Interrupt source of the parent interrupt controller. Only 42- interrupts : Interrupt source of the parent interrupt controller. Only
43 present on secondary GICs. 43 present on secondary GICs.
44 44
45- cpu-offset : per-cpu offset within the distributor and cpu interface
46 regions, used when the GIC doesn't have banked registers. The offset is
47 cpu-offset * cpu-nr.
48
45Example: 49Example:
46 50
47 intc: interrupt-controller@fff11000 { 51 intc: interrupt-controller@fff11000 {
diff --git a/Documentation/devicetree/bindings/arm/insignal-boards.txt b/Documentation/devicetree/bindings/arm/insignal-boards.txt
new file mode 100644
index 000000000000..524c3dc5d808
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/insignal-boards.txt
@@ -0,0 +1,8 @@
1* Insignal's Exynos4210 based Origen evaluation board
2
3Origen low-cost evaluation board is based on Samsung's Exynos4210 SoC.
4
5Required root node properties:
6 - compatible = should be one or more of the following.
7 (a) "samsung,smdkv310" - for Samsung's SMDKV310 eval board.
8 (b) "samsung,exynos4210" - for boards based on Exynos4210 SoC.
diff --git a/Documentation/devicetree/bindings/arm/samsung-boards.txt b/Documentation/devicetree/bindings/arm/samsung-boards.txt
new file mode 100644
index 000000000000..0bf68be56fd1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung-boards.txt
@@ -0,0 +1,8 @@
1* Samsung's Exynos4210 based SMDKV310 evaluation board
2
3SMDKV310 evaluation board is based on Samsung's Exynos4210 SoC.
4
5Required root node properties:
6 - compatible = should be one or more of the following.
7 (a) "samsung,smdkv310" - for Samsung's SMDKV310 eval board.
8 (b) "samsung,exynos4210" - for boards based on Exynos4210 SoC.
diff --git a/Documentation/devicetree/bindings/arm/tegra.txt b/Documentation/devicetree/bindings/arm/tegra.txt
new file mode 100644
index 000000000000..6e69d2e5e766
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra.txt
@@ -0,0 +1,14 @@
1NVIDIA Tegra device tree bindings
2-------------------------------------------
3
4Boards with the tegra20 SoC shall have the following properties:
5
6Required root node property:
7
8compatible = "nvidia,tegra20";
9
10Boards with the tegra30 SoC shall have the following properties:
11
12Required root node property:
13
14compatible = "nvidia,tegra30";
diff --git a/Documentation/devicetree/bindings/arm/vic.txt b/Documentation/devicetree/bindings/arm/vic.txt
new file mode 100644
index 000000000000..266716b23437
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vic.txt
@@ -0,0 +1,29 @@
1* ARM Vectored Interrupt Controller
2
3One or more Vectored Interrupt Controllers (VIC's) can be connected in an ARM
4system for interrupt routing. For multiple controllers they can either be
5nested or have the outputs wire-OR'd together.
6
7Required properties:
8
9- compatible : should be one of
10 "arm,pl190-vic"
11 "arm,pl192-vic"
12- interrupt-controller : Identifies the node as an interrupt controller
13- #interrupt-cells : The number of cells to define the interrupts. Must be 1 as
14 the VIC has no configuration options for interrupt sources. The cell is a u32
15 and defines the interrupt number.
16- reg : The register bank for the VIC.
17
18Optional properties:
19
20- interrupts : Interrupt source for parent controllers if the VIC is nested.
21
22Example:
23
24 vic0: interrupt-controller@60000 {
25 compatible = "arm,pl192-vic";
26 interrupt-controller;
27 #interrupt-cells = <1>;
28 reg = <0x60000 0x1000>;
29 };
diff --git a/Documentation/devicetree/bindings/c6x/clocks.txt b/Documentation/devicetree/bindings/c6x/clocks.txt
new file mode 100644
index 000000000000..a04f5fd30122
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/clocks.txt
@@ -0,0 +1,40 @@
1C6X PLL Clock Controllers
2-------------------------
3
4This is a first-cut support for the SoC clock controllers. This is still
5under development and will probably change as the common device tree
6clock support is added to the kernel.
7
8Required properties:
9
10- compatible: "ti,c64x+pll"
11 May also have SoC-specific value to support SoC-specific initialization
12 in the driver. One of:
13 "ti,c6455-pll"
14 "ti,c6457-pll"
15 "ti,c6472-pll"
16 "ti,c6474-pll"
17
18- reg: base address and size of register area
19- clock-frequency: input clock frequency in hz
20
21
22Optional properties:
23
24- ti,c64x+pll-bypass-delay: CPU cycles to delay when entering bypass mode
25
26- ti,c64x+pll-reset-delay: CPU cycles to delay after PLL reset
27
28- ti,c64x+pll-lock-delay: CPU cycles to delay after PLL frequency change
29
30Example:
31
32 clock-controller@29a0000 {
33 compatible = "ti,c6472-pll", "ti,c64x+pll";
34 reg = <0x029a0000 0x200>;
35 clock-frequency = <25000000>;
36
37 ti,c64x+pll-bypass-delay = <200>;
38 ti,c64x+pll-reset-delay = <12000>;
39 ti,c64x+pll-lock-delay = <80000>;
40 };
diff --git a/Documentation/devicetree/bindings/c6x/dscr.txt b/Documentation/devicetree/bindings/c6x/dscr.txt
new file mode 100644
index 000000000000..d847758f2b20
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/dscr.txt
@@ -0,0 +1,127 @@
1Device State Configuration Registers
2------------------------------------
3
4TI C6X SoCs contain a region of miscellaneous registers which provide various
5function for SoC control or status. Details vary considerably among from SoC
6to SoC with no two being alike.
7
8In general, the Device State Configuraion Registers (DSCR) will provide one or
9more configuration registers often protected by a lock register where one or
10more key values must be written to a lock register in order to unlock the
11configuration register for writes. These configuration register may be used to
12enable (and disable in some cases) SoC pin drivers, select peripheral clock
13sources (internal or pin), etc. In some cases, a configuration register is
14write once or the individual bits are write once. In addition to device config,
15the DSCR block may provide registers which which are used to reset peripherals,
16provide device ID information, provide ethernet MAC addresses, as well as other
17miscellaneous functions.
18
19For device state control (enable/disable), each device control is assigned an
20id which is used by individual device drivers to control the state as needed.
21
22Required properties:
23
24- compatible: must be "ti,c64x+dscr"
25- reg: register area base and size
26
27Optional properties:
28
29 NOTE: These are optional in that not all SoCs will have all properties. For
30 SoCs which do support a given property, leaving the property out of the
31 device tree will result in reduced functionality or possibly driver
32 failure.
33
34- ti,dscr-devstat
35 offset of the devstat register
36
37- ti,dscr-silicon-rev
38 offset, start bit, and bitsize of silicon revision field
39
40- ti,dscr-rmii-resets
41 offset and bitmask of RMII reset field. May have multiple tuples if more
42 than one ethernet port is available.
43
44- ti,dscr-locked-regs
45 possibly multiple tuples describing registers which are write protected by
46 a lock register. Each tuple consists of the register offset, lock register
47 offsset, and the key value used to unlock the register.
48
49- ti,dscr-kick-regs
50 offset and key values of two "kick" registers used to write protect other
51 registers in DSCR. On SoCs using kick registers, the first key must be
52 written to the first kick register and the second key must be written to
53 the second register before other registers in the area are write-enabled.
54
55- ti,dscr-mac-fuse-regs
56 MAC addresses are contained in two registers. Each element of a MAC address
57 is contained in a single byte. This property has two tuples. Each tuple has
58 a register offset and four cells representing bytes in the register from
59 most significant to least. The value of these four cells is the MAC byte
60 index (1-6) of the byte within the register. A value of 0 means the byte
61 is unused in the MAC address.
62
63- ti,dscr-devstate-ctl-regs
64 This property describes the bitfields used to control the state of devices.
65 Each tuple describes a range of identical bitfields used to control one or
66 more devices (one bitfield per device). The layout of each tuple is:
67
68 start_id num_ids reg enable disable start_bit nbits
69
70 Where:
71 start_id is device id for the first device control in the range
72 num_ids is the number of device controls in the range
73 reg is the offset of the register holding the control bits
74 enable is the value to enable a device
75 disable is the value to disable a device (0xffffffff if cannot disable)
76 start_bit is the bit number of the first bit in the range
77 nbits is the number of bits per device control
78
79- ti,dscr-devstate-stat-regs
80 This property describes the bitfields used to provide device state status
81 for device states controlled by the DSCR. Each tuple describes a range of
82 identical bitfields used to provide status for one or more devices (one
83 bitfield per device). The layout of each tuple is:
84
85 start_id num_ids reg enable disable start_bit nbits
86
87 Where:
88 start_id is device id for the first device status in the range
89 num_ids is the number of devices covered by the range
90 reg is the offset of the register holding the status bits
91 enable is the value indicating device is enabled
92 disable is the value indicating device is disabled
93 start_bit is the bit number of the first bit in the range
94 nbits is the number of bits per device status
95
96- ti,dscr-privperm
97 Offset and default value for register used to set access privilege for
98 some SoC devices.
99
100
101Example:
102
103 device-state-config-regs@2a80000 {
104 compatible = "ti,c64x+dscr";
105 reg = <0x02a80000 0x41000>;
106
107 ti,dscr-devstat = <0>;
108 ti,dscr-silicon-rev = <8 28 0xf>;
109 ti,dscr-rmii-resets = <0x40020 0x00040000>;
110
111 ti,dscr-locked-regs = <0x40008 0x40004 0x0f0a0b00>;
112 ti,dscr-devstate-ctl-regs =
113 <0 12 0x40008 1 0 0 2
114 12 1 0x40008 3 0 30 2
115 13 2 0x4002c 1 0xffffffff 0 1>;
116 ti,dscr-devstate-stat-regs =
117 <0 10 0x40014 1 0 0 3
118 10 2 0x40018 1 0 0 3>;
119
120 ti,dscr-mac-fuse-regs = <0x700 1 2 3 4
121 0x704 5 6 0 0>;
122
123 ti,dscr-privperm = <0x41c 0xaaaaaaaa>;
124
125 ti,dscr-kick-regs = <0x38 0x83E70B13
126 0x3c 0x95A4F1E0>;
127 };
diff --git a/Documentation/devicetree/bindings/c6x/emifa.txt b/Documentation/devicetree/bindings/c6x/emifa.txt
new file mode 100644
index 000000000000..0ff6e9b9a13f
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/emifa.txt
@@ -0,0 +1,62 @@
1External Memory Interface
2-------------------------
3
4The emifa node describes a simple external bus controller found on some C6X
5SoCs. This interface provides external busses with a number of chip selects.
6
7Required properties:
8
9- compatible: must be "ti,c64x+emifa", "simple-bus"
10- reg: register area base and size
11- #address-cells: must be 2 (chip-select + offset)
12- #size-cells: must be 1
13- ranges: mapping from EMIFA space to parent space
14
15
16Optional properties:
17
18- ti,dscr-dev-enable: Device ID if EMIF is enabled/disabled from DSCR
19
20- ti,emifa-burst-priority:
21 Number of memory transfers after which the EMIF will elevate the priority
22 of the oldest command in the command FIFO. Setting this field to 255
23 disables this feature, thereby allowing old commands to stay in the FIFO
24 indefinitely.
25
26- ti,emifa-ce-config:
27 Configuration values for each of the supported chip selects.
28
29Example:
30
31 emifa@70000000 {
32 compatible = "ti,c64x+emifa", "simple-bus";
33 #address-cells = <2>;
34 #size-cells = <1>;
35 reg = <0x70000000 0x100>;
36 ranges = <0x2 0x0 0xa0000000 0x00000008
37 0x3 0x0 0xb0000000 0x00400000
38 0x4 0x0 0xc0000000 0x10000000
39 0x5 0x0 0xD0000000 0x10000000>;
40
41 ti,dscr-dev-enable = <13>;
42 ti,emifa-burst-priority = <255>;
43 ti,emifa-ce-config = <0x00240120
44 0x00240120
45 0x00240122
46 0x00240122>;
47
48 flash@3,0 {
49 #address-cells = <1>;
50 #size-cells = <1>;
51 compatible = "cfi-flash";
52 reg = <0x3 0x0 0x400000>;
53 bank-width = <1>;
54 device-width = <1>;
55 partition@0 {
56 reg = <0x0 0x400000>;
57 label = "NOR";
58 };
59 };
60 };
61
62This shows a flash chip attached to chip select 3.
diff --git a/Documentation/devicetree/bindings/c6x/interrupt.txt b/Documentation/devicetree/bindings/c6x/interrupt.txt
new file mode 100644
index 000000000000..42bb796cc4ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/interrupt.txt
@@ -0,0 +1,104 @@
1C6X Interrupt Chips
2-------------------
3
4* C64X+ Core Interrupt Controller
5
6 The core interrupt controller provides 16 prioritized interrupts to the
7 C64X+ core. Priority 0 and 1 are used for reset and NMI respectively.
8 Priority 2 and 3 are reserved. Priority 4-15 are used for interrupt
9 sources coming from outside the core.
10
11 Required properties:
12 --------------------
13 - compatible: Should be "ti,c64x+core-pic";
14 - #interrupt-cells: <1>
15
16 Interrupt Specifier Definition
17 ------------------------------
18 Single cell specifying the core interrupt priority level (4-15) where
19 4 is highest priority and 15 is lowest priority.
20
21 Example
22 -------
23 core_pic: interrupt-controller@0 {
24 interrupt-controller;
25 #interrupt-cells = <1>;
26 compatible = "ti,c64x+core-pic";
27 };
28
29
30
31* C64x+ Megamodule Interrupt Controller
32
33 The megamodule PIC consists of four interrupt mupliplexers each of which
34 combine up to 32 interrupt inputs into a single interrupt output which
35 may be cascaded into the core interrupt controller. The megamodule PIC
36 has a total of 12 outputs cascading into the core interrupt controller.
37 One for each core interrupt priority level. In addition to the combined
38 interrupt sources, individual megamodule interrupts may be cascaded to
39 the core interrupt controller. When an individual interrupt is cascaded,
40 it is no longer handled through a megamodule interrupt combiner and is
41 considered to have the core interrupt controller as the parent.
42
43 Required properties:
44 --------------------
45 - compatible: "ti,c64x+megamod-pic"
46 - interrupt-controller
47 - #interrupt-cells: <1>
48 - reg: base address and size of register area
49 - interrupt-parent: must be core interrupt controller
50 - interrupts: This should have four cells; one for each interrupt combiner.
51 The cells contain the core priority interrupt to which the
52 corresponding combiner output is wired.
53
54 Optional properties:
55 --------------------
56 - ti,c64x+megamod-pic-mux: Array of 12 cells correspnding to the 12 core
57 priority interrupts. The first cell corresponds to
58 core priority 4 and the last cell corresponds to
59 core priority 15. The value of each cell is the
60 megamodule interrupt source which is MUXed to
61 the core interrupt corresponding to the cell
62 position. Allowed values are 4 - 127. Mapping for
63 interrupts 0 - 3 (combined interrupt sources) are
64 ignored.
65
66 Interrupt Specifier Definition
67 ------------------------------
68 Single cell specifying the megamodule interrupt source (4-127). Note that
69 interrupts mapped directly to the core with "ti,c64x+megamod-pic-mux" will
70 use the core interrupt controller as their parent and the specifier will
71 be the core priority level, not the megamodule interrupt number.
72
73 Examples
74 --------
75 megamod_pic: interrupt-controller@1800000 {
76 compatible = "ti,c64x+megamod-pic";
77 interrupt-controller;
78 #interrupt-cells = <1>;
79 reg = <0x1800000 0x1000>;
80 interrupt-parent = <&core_pic>;
81 interrupts = < 12 13 14 15 >;
82 };
83
84 This is a minimal example where all individual interrupts go through a
85 combiner. Combiner-0 is mapped to core interrupt 12, combiner-1 is mapped
86 to interrupt 13, etc.
87
88
89 megamod_pic: interrupt-controller@1800000 {
90 compatible = "ti,c64x+megamod-pic";
91 interrupt-controller;
92 #interrupt-cells = <1>;
93 reg = <0x1800000 0x1000>;
94 interrupt-parent = <&core_pic>;
95 interrupts = < 12 13 14 15 >;
96 ti,c64x+megamod-pic-mux = < 0 0 0 0
97 32 0 0 0
98 0 0 0 0 >;
99 };
100
101 This the same as the first example except that megamodule interrupt 32 is
102 mapped directly to core priority interrupt 8. The node using this interrupt
103 must set the core controller as its interrupt parent and use 8 in the
104 interrupt specifier value.
diff --git a/Documentation/devicetree/bindings/c6x/soc.txt b/Documentation/devicetree/bindings/c6x/soc.txt
new file mode 100644
index 000000000000..b1e4973b5769
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/soc.txt
@@ -0,0 +1,28 @@
1C6X System-on-Chip
2------------------
3
4Required properties:
5
6- compatible: "simple-bus"
7- #address-cells: must be 1
8- #size-cells: must be 1
9- ranges
10
11Optional properties:
12
13- model: specific SoC model
14
15- nodes for IP blocks within SoC
16
17
18Example:
19
20 soc {
21 compatible = "simple-bus";
22 model = "tms320c6455";
23 #address-cells = <1>;
24 #size-cells = <1>;
25 ranges;
26
27 ...
28 };
diff --git a/Documentation/devicetree/bindings/c6x/timer64.txt b/Documentation/devicetree/bindings/c6x/timer64.txt
new file mode 100644
index 000000000000..95911fe70224
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/timer64.txt
@@ -0,0 +1,26 @@
1Timer64
2-------
3
4The timer64 node describes C6X event timers.
5
6Required properties:
7
8- compatible: must be "ti,c64x+timer64"
9- reg: base address and size of register region
10- interrupt-parent: interrupt controller
11- interrupts: interrupt id
12
13Optional properties:
14
15- ti,dscr-dev-enable: Device ID used to enable timer IP through DSCR interface.
16
17- ti,core-mask: on multi-core SoCs, bitmask of cores allowed to use this timer.
18
19Example:
20 timer0: timer@25e0000 {
21 compatible = "ti,c64x+timer64";
22 ti,core-mask = < 0x01 >;
23 reg = <0x25e0000 0x40>;
24 interrupt-parent = <&megamod_pic>;
25 interrupts = < 16 >;
26 };
diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt
new file mode 100644
index 000000000000..a4cd273b2a67
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt
@@ -0,0 +1,30 @@
1* ARM PrimeCell PL330 DMA Controller
2
3The ARM PrimeCell PL330 DMA controller can move blocks of memory contents
4between memory and peripherals or memory to memory.
5
6Required properties:
7 - compatible: should include both "arm,pl330" and "arm,primecell".
8 - reg: physical base address of the controller and length of memory mapped
9 region.
10 - interrupts: interrupt number to the cpu.
11
12Example:
13
14 pdma0: pdma@12680000 {
15 compatible = "arm,pl330", "arm,primecell";
16 reg = <0x12680000 0x1000>;
17 interrupts = <99>;
18 };
19
20Client drivers (device nodes requiring dma transfers from dev-to-mem or
21mem-to-dev) should specify the DMA channel numbers using a two-value pair
22as shown below.
23
24 [property name] = <[phandle of the dma controller] [dma request id]>;
25
26 where 'dma request id' is the dma request number which is connected
27 to the client controller. The 'property name' is recommended to be
28 of the form <name>-dma-channel.
29
30 Example: tx-dma-channel = <&pdma0 12>;
diff --git a/Documentation/devicetree/bindings/gpio/gpio-samsung.txt b/Documentation/devicetree/bindings/gpio/gpio-samsung.txt
new file mode 100644
index 000000000000..8f50fe5e6c42
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-samsung.txt
@@ -0,0 +1,40 @@
1Samsung Exynos4 GPIO Controller
2
3Required properties:
4- compatible: Compatible property value should be "samsung,exynos4-gpio>".
5
6- reg: Physical base address of the controller and length of memory mapped
7 region.
8
9- #gpio-cells: Should be 4. The syntax of the gpio specifier used by client nodes
10 should be the following with values derived from the SoC user manual.
11 <[phandle of the gpio controller node]
12 [pin number within the gpio controller]
13 [mux function]
14 [pull up/down]
15 [drive strength]>
16
17 Values for gpio specifier:
18 - Pin number: is a value between 0 to 7.
19 - Pull Up/Down: 0 - Pull Up/Down Disabled.
20 1 - Pull Down Enabled.
21 3 - Pull Up Enabled.
22 - Drive Strength: 0 - 1x,
23 1 - 3x,
24 2 - 2x,
25 3 - 4x
26
27- gpio-controller: Specifies that the node is a gpio controller.
28- #address-cells: should be 1.
29- #size-cells: should be 1.
30
31Example:
32
33 gpa0: gpio-controller@11400000 {
34 #address-cells = <1>;
35 #size-cells = <1>;
36 compatible = "samsung,exynos4-gpio";
37 reg = <0x11400000 0x20>;
38 #gpio-cells = <4>;
39 gpio-controller;
40 };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-designware.txt b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
new file mode 100644
index 000000000000..e42a2ee233e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
@@ -0,0 +1,22 @@
1* Synopsys DesignWare I2C
2
3Required properties :
4
5 - compatible : should be "snps,designware-i2c"
6 - reg : Offset and length of the register set for the device
7 - interrupts : <IRQ> where IRQ is the interrupt number.
8
9Recommended properties :
10
11 - clock-frequency : desired I2C bus clock frequency in Hz.
12
13Example :
14
15 i2c@f0000 {
16 #address-cells = <1>;
17 #size-cells = <0>;
18 compatible = "snps,designware-i2c";
19 reg = <0xf0000 0x1000>;
20 interrupts = <11>;
21 clock-frequency = <400000>;
22 };
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
new file mode 100644
index 000000000000..1a85f986961b
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -0,0 +1,58 @@
1This is a list of trivial i2c devices that have simple device tree
2bindings, consisting only of a compatible field, an address and
3possibly an interrupt line.
4
5If a device needs more specific bindings, such as properties to
6describe some aspect of it, there needs to be a specific binding
7document for it just like any other devices.
8
9
10Compatible Vendor / Chip
11========== =============
12ad,ad7414 SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin
13ad,adm9240 ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems
14adi,adt7461 +/-1C TDM Extended Temp Range I.C
15adt7461 +/-1C TDM Extended Temp Range I.C
16at,24c08 i2c serial eeprom (24cxx)
17atmel,24c02 i2c serial eeprom (24cxx)
18catalyst,24c32 i2c serial eeprom
19dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock
20dallas,ds1338 I2C RTC with 56-Byte NV RAM
21dallas,ds1339 I2C Serial Real-Time Clock
22dallas,ds1340 I2C RTC with Trickle Charger
23dallas,ds1374 I2C, 32-Bit Binary Counter Watchdog RTC with Trickle Charger and Reset Input/Output
24dallas,ds1631 High-Precision Digital Thermometer
25dallas,ds1682 Total-Elapsed-Time Recorder with Alarm
26dallas,ds1775 Tiny Digital Thermometer and Thermostat
27dallas,ds3232 Extremely Accurate I²C RTC with Integrated Crystal and SRAM
28dallas,ds4510 CPU Supervisor with Nonvolatile Memory and Programmable I/O
29dallas,ds75 Digital Thermometer and Thermostat
30dialog,da9053 DA9053: flexible system level PMIC with multicore support
31epson,rx8025 High-Stability. I2C-Bus INTERFACE REAL TIME CLOCK MODULE
32epson,rx8581 I2C-BUS INTERFACE REAL TIME CLOCK MODULE
33fsl,mag3110 MAG3110: Xtrinsic High Accuracy, 3D Magnetometer
34fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51
35fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer
36fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller
37fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec
38maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator
39maxim,max1237 Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs
40maxim,max6625 9-Bit/12-Bit Temperature Sensors with I²C-Compatible Serial Interface
41mc,rv3029c2 Real Time Clock Module with I2C-Bus
42national,lm75 I2C TEMP SENSOR
43national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
44national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface
45nxp,pca9556 Octal SMBus and I2C registered interface
46nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset
47nxp,pcf8563 Real-time clock/calendar
48ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus
49pericom,pt7c4338 Real-time Clock Module
50plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch
51ramtron,24c64 i2c serial eeprom (24cxx)
52ricoh,rs5c372a I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
53samsung,24ad0xd1 S524AD0XF1 (128K/256K-bit Serial EEPROM for Low Power)
54st-micro,24c256 i2c serial eeprom (24cxx)
55stm,m41t00 Serial Access TIMEKEEPER
56stm,m41t62 Serial real-time clock (RTC) with alarm
57stm,m41t80 M41T80 - SERIAL ACCESS RTC WITH ALARMS
58ti,tsc2003 I2C Touch-Screen Controller
diff --git a/Documentation/devicetree/bindings/input/samsung-keypad.txt b/Documentation/devicetree/bindings/input/samsung-keypad.txt
new file mode 100644
index 000000000000..ce3e394c0e64
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/samsung-keypad.txt
@@ -0,0 +1,88 @@
1* Samsung's Keypad Controller device tree bindings
2
3Samsung's Keypad controller is used to interface a SoC with a matrix-type
4keypad device. The keypad controller supports multiple row and column lines.
5A key can be placed at each intersection of a unique row and a unique column.
6The keypad controller can sense a key-press and key-release and report the
7event using a interrupt to the cpu.
8
9Required SoC Specific Properties:
10- compatible: should be one of the following
11 - "samsung,s3c6410-keypad": For controllers compatible with s3c6410 keypad
12 controller.
13 - "samsung,s5pv210-keypad": For controllers compatible with s5pv210 keypad
14 controller.
15
16- reg: physical base address of the controller and length of memory mapped
17 region.
18
19- interrupts: The interrupt number to the cpu.
20
21Required Board Specific Properties:
22- samsung,keypad-num-rows: Number of row lines connected to the keypad
23 controller.
24
25- samsung,keypad-num-columns: Number of column lines connected to the
26 keypad controller.
27
28- row-gpios: List of gpios used as row lines. The gpio specifier for
29 this property depends on the gpio controller to which these row lines
30 are connected.
31
32- col-gpios: List of gpios used as column lines. The gpio specifier for
33 this property depends on the gpio controller to which these column
34 lines are connected.
35
36- Keys represented as child nodes: Each key connected to the keypad
37 controller is represented as a child node to the keypad controller
38 device node and should include the following properties.
39 - keypad,row: the row number to which the key is connected.
40 - keypad,column: the column number to which the key is connected.
41 - linux,code: the key-code to be reported when the key is pressed
42 and released.
43
44Optional Properties specific to linux:
45- linux,keypad-no-autorepeat: do no enable autorepeat feature.
46- linux,keypad-wakeup: use any event on keypad as wakeup event.
47
48
49Example:
50 keypad@100A0000 {
51 compatible = "samsung,s5pv210-keypad";
52 reg = <0x100A0000 0x100>;
53 interrupts = <173>;
54 samsung,keypad-num-rows = <2>;
55 samsung,keypad-num-columns = <8>;
56 linux,input-no-autorepeat;
57 linux,input-wakeup;
58
59 row-gpios = <&gpx2 0 3 3 0
60 &gpx2 1 3 3 0>;
61
62 col-gpios = <&gpx1 0 3 0 0
63 &gpx1 1 3 0 0
64 &gpx1 2 3 0 0
65 &gpx1 3 3 0 0
66 &gpx1 4 3 0 0
67 &gpx1 5 3 0 0
68 &gpx1 6 3 0 0
69 &gpx1 7 3 0 0>;
70
71 key_1 {
72 keypad,row = <0>;
73 keypad,column = <3>;
74 linux,code = <2>;
75 };
76
77 key_2 {
78 keypad,row = <0>;
79 keypad,column = <4>;
80 linux,code = <3>;
81 };
82
83 key_3 {
84 keypad,row = <0>;
85 keypad,column = <5>;
86 linux,code = <4>;
87 };
88 };
diff --git a/Documentation/devicetree/bindings/input/tegra-kbc.txt b/Documentation/devicetree/bindings/input/tegra-kbc.txt
new file mode 100644
index 000000000000..5ecfa99089b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/tegra-kbc.txt
@@ -0,0 +1,18 @@
1* Tegra keyboard controller
2
3Required properties:
4- compatible: "nvidia,tegra20-kbc"
5
6Optional properties:
7- debounce-delay: delay in milliseconds per row scan for debouncing
8- repeat-delay: delay in milliseconds before repeat starts
9- ghost-filter: enable ghost filtering for this device
10- wakeup-source: configure keyboard as a wakeup source for suspend/resume
11
12Example:
13
14keyboard: keyboard {
15 compatible = "nvidia,tegra20-kbc";
16 reg = <0x7000e200 0x100>;
17 ghost-filter;
18};
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
new file mode 100644
index 000000000000..719f4dc58df7
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -0,0 +1,44 @@
1GPIO assisted NAND flash
2
3The GPIO assisted NAND flash uses a memory mapped interface to
4read/write the NAND commands and data and GPIO pins for the control
5signals.
6
7Required properties:
8- compatible : "gpio-control-nand"
9- reg : should specify localbus chip select and size used for the chip. The
10 resource describes the data bus connected to the NAND flash and all accesses
11 are made in native endianness.
12- #address-cells, #size-cells : Must be present if the device has sub-nodes
13 representing partitions.
14- gpios : specifies the gpio pins to control the NAND device. nwp is an
15 optional gpio and may be set to 0 if not present.
16
17Optional properties:
18- bank-width : Width (in bytes) of the device. If not present, the width
19 defaults to 1 byte.
20- chip-delay : chip dependent delay for transferring data from array to
21 read registers (tR). If not present then a default of 20us is used.
22- gpio-control-nand,io-sync-reg : A 64-bit physical address for a read
23 location used to guard against bus reordering with regards to accesses to
24 the GPIO's and the NAND flash data bus. If present, then after changing
25 GPIO state and before and after command byte writes, this register will be
26 read to ensure that the GPIO accesses have completed.
27
28Examples:
29
30gpio-nand@1,0 {
31 compatible = "gpio-control-nand";
32 reg = <1 0x0000 0x2>;
33 #address-cells = <1>;
34 #size-cells = <1>;
35 gpios = <&banka 1 0 /* rdy */
36 &banka 2 0 /* nce */
37 &banka 3 0 /* ale */
38 &banka 4 0 /* cle */
39 0 /* nwp */>;
40
41 partition@0 {
42 ...
43 };
44};
diff --git a/Documentation/devicetree/bindings/net/calxeda-xgmac.txt b/Documentation/devicetree/bindings/net/calxeda-xgmac.txt
new file mode 100644
index 000000000000..411727a3f82d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/calxeda-xgmac.txt
@@ -0,0 +1,15 @@
1* Calxeda Highbank 10Gb XGMAC Ethernet
2
3Required properties:
4- compatible : Should be "calxeda,hb-xgmac"
5- reg : Address and length of the register set for the device
6- interrupts : Should contain 3 xgmac interrupts. The 1st is main interrupt.
7 The 2nd is pwr mgt interrupt. The 3rd is low power state interrupt.
8
9Example:
10
11ethernet@fff50000 {
12 compatible = "calxeda,hb-xgmac";
13 reg = <0xfff50000 0x1000>;
14 interrupts = <0 77 4 0 78 4 0 79 4>;
15};
diff --git a/Documentation/devicetree/bindings/net/can/cc770.txt b/Documentation/devicetree/bindings/net/can/cc770.txt
new file mode 100644
index 000000000000..77027bf6460a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/cc770.txt
@@ -0,0 +1,53 @@
1Memory mapped Bosch CC770 and Intel AN82527 CAN controller
2
3Note: The CC770 is a CAN controller from Bosch, which is 100%
4compatible with the old AN82527 from Intel, but with "bugs" being fixed.
5
6Required properties:
7
8- compatible : should be "bosch,cc770" for the CC770 and "intc,82527"
9 for the AN82527.
10
11- reg : should specify the chip select, address offset and size required
12 to map the registers of the controller. The size is usually 0x80.
13
14- interrupts : property with a value describing the interrupt source
15 (number and sensitivity) required for the controller.
16
17Optional properties:
18
19- bosch,external-clock-frequency : frequency of the external oscillator
20 clock in Hz. Note that the internal clock frequency used by the
21 controller is half of that value. If not specified, a default
22 value of 16000000 (16 MHz) is used.
23
24- bosch,clock-out-frequency : slock frequency in Hz on the CLKOUT pin.
25 If not specified or if the specified value is 0, the CLKOUT pin
26 will be disabled.
27
28- bosch,slew-rate : slew rate of the CLKOUT signal. If not specified,
29 a resonable value will be calculated.
30
31- bosch,disconnect-rx0-input : see data sheet.
32
33- bosch,disconnect-rx1-input : see data sheet.
34
35- bosch,disconnect-tx1-output : see data sheet.
36
37- bosch,polarity-dominant : see data sheet.
38
39- bosch,divide-memory-clock : see data sheet.
40
41- bosch,iso-low-speed-mux : see data sheet.
42
43For further information, please have a look to the CC770 or AN82527.
44
45Examples:
46
47can@3,100 {
48 compatible = "bosch,cc770";
49 reg = <3 0x100 0x80>;
50 interrupts = <2 0>;
51 interrupt-parent = <&mpic>;
52 bosch,external-clock-frequency = <16000000>;
53};
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
new file mode 100644
index 000000000000..44afa0e5057d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -0,0 +1,25 @@
1* Cadence MACB/GEM Ethernet controller
2
3Required properties:
4- compatible: Should be "cdns,[<chip>-]{macb|gem}"
5 Use "cdns,at91sam9260-macb" Atmel at91sam9260 and at91sam9263 SoCs.
6 Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb".
7 Use "cnds,pc302-gem" for Picochip picoXcell pc302 and later devices based on
8 the Cadence GEM, or the generic form: "cdns,gem".
9- reg: Address and length of the register set for the device
10- interrupts: Should contain macb interrupt
11- phy-mode: String, operation mode of the PHY interface.
12 Supported values are: "mii", "rmii", "gmii", "rgmii".
13
14Optional properties:
15- local-mac-address: 6 bytes, mac address
16
17Examples:
18
19 macb0: ethernet@fffc4000 {
20 compatible = "cdns,at32ap7000-macb";
21 reg = <0xfffc4000 0x4000>;
22 interrupts = <21>;
23 phy-mode = "rmii";
24 local-mac-address = [3a 0e 03 04 05 06];
25 };
diff --git a/Documentation/devicetree/bindings/nvec/nvec_nvidia.txt b/Documentation/devicetree/bindings/nvec/nvec_nvidia.txt
new file mode 100644
index 000000000000..5aeee53ff9f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvec/nvec_nvidia.txt
@@ -0,0 +1,9 @@
1NVIDIA compliant embedded controller
2
3Required properties:
4- compatible : should be "nvidia,nvec".
5- reg : the iomem of the i2c slave controller
6- interrupts : the interrupt line of the i2c slave controller
7- clock-frequency : the frequency of the i2c bus
8- gpios : the gpio used for ec request
9- slave-addr: the i2c address of the slave controller
diff --git a/Documentation/devicetree/bindings/power_supply/olpc_battery.txt b/Documentation/devicetree/bindings/power_supply/olpc_battery.txt
new file mode 100644
index 000000000000..c8901b3992d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/olpc_battery.txt
@@ -0,0 +1,5 @@
1OLPC battery
2~~~~~~~~~~~~
3
4Required properties:
5 - compatible : "olpc,xo1-battery"
diff --git a/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt b/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt
new file mode 100644
index 000000000000..c40e8926facf
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt
@@ -0,0 +1,23 @@
1SBS sbs-battery
2~~~~~~~~~~
3
4Required properties :
5 - compatible : "sbs,sbs-battery"
6
7Optional properties :
8 - sbs,i2c-retry-count : The number of times to retry i2c transactions on i2c
9 IO failure.
10 - sbs,poll-retry-count : The number of times to try looking for new status
11 after an external change notification.
12 - sbs,battery-detect-gpios : The gpio which signals battery detection and
13 a flag specifying its polarity.
14
15Example:
16
17 bq20z75@b {
18 compatible = "sbs,sbs-battery";
19 reg = < 0xb >;
20 sbs,i2c-retry-count = <2>;
21 sbs,poll-retry-count = <10>;
22 sbs,battery-detect-gpios = <&gpio-controller 122 1>;
23 }
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt b/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt
new file mode 100644
index 000000000000..b9a8a2bcfae7
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt
@@ -0,0 +1,163 @@
1Message unit node:
2
3For SRIO controllers that implement the message unit as part of the controller
4this node is required. For devices with RMAN this node should NOT exist. The
5node is composed of three types of sub-nodes ("fsl-srio-msg-unit",
6"fsl-srio-dbell-unit" and "fsl-srio-port-write-unit").
7
8See srio.txt for more details about generic SRIO controller details.
9
10 - compatible
11 Usage: required
12 Value type: <string>
13 Definition: Must include "fsl,srio-rmu-vX.Y", "fsl,srio-rmu".
14
15 The version X.Y should match the general SRIO controller's IP Block
16 revision register's Major(X) and Minor (Y) value.
17
18 - reg
19 Usage: required
20 Value type: <prop-encoded-array>
21 Definition: A standard property. Specifies the physical address and
22 length of the SRIO configuration registers for message units
23 and doorbell units.
24
25 - fsl,liodn
26 Usage: optional-but-recommended (for devices with PAMU)
27 Value type: <prop-encoded-array>
28 Definition: The logical I/O device number for the PAMU (IOMMU) to be
29 correctly configured for SRIO accesses. The property should
30 not exist on devices that do not support PAMU.
31
32 The LIODN value is associated with all RMU transactions
33 (msg-unit, doorbell, port-write).
34
35Sub-Nodes for RMU: The RMU node is composed of multiple sub-nodes that
36correspond to the actual sub-controllers in the RMU. The manual for a given
37SoC will detail which and how many of these sub-controllers are implemented.
38
39Message Unit:
40
41 - compatible
42 Usage: required
43 Value type: <string>
44 Definition: Must include "fsl,srio-msg-unit-vX.Y", "fsl,srio-msg-unit".
45
46 The version X.Y should match the general SRIO controller's IP Block
47 revision register's Major(X) and Minor (Y) value.
48
49 - reg
50 Usage: required
51 Value type: <prop-encoded-array>
52 Definition: A standard property. Specifies the physical address and
53 length of the SRIO configuration registers for message units
54 and doorbell units.
55
56 - interrupts
57 Usage: required
58 Value type: <prop_encoded-array>
59 Definition: Specifies the interrupts generated by this device. The
60 value of the interrupts property consists of one interrupt
61 specifier. The format of the specifier is defined by the
62 binding document describing the node's interrupt parent.
63
64 A pair of IRQs are specified in this property. The first
65 element is associated with the transmit (TX) interrupt and the
66 second element is associated with the receive (RX) interrupt.
67
68Doorbell Unit:
69
70 - compatible
71 Usage: required
72 Value type: <string>
73 Definition: Must include:
74 "fsl,srio-dbell-unit-vX.Y", "fsl,srio-dbell-unit"
75
76 The version X.Y should match the general SRIO controller's IP Block
77 revision register's Major(X) and Minor (Y) value.
78
79 - reg
80 Usage: required
81 Value type: <prop-encoded-array>
82 Definition: A standard property. Specifies the physical address and
83 length of the SRIO configuration registers for message units
84 and doorbell units.
85
86 - interrupts
87 Usage: required
88 Value type: <prop_encoded-array>
89 Definition: Specifies the interrupts generated by this device. The
90 value of the interrupts property consists of one interrupt
91 specifier. The format of the specifier is defined by the
92 binding document describing the node's interrupt parent.
93
94 A pair of IRQs are specified in this property. The first
95 element is associated with the transmit (TX) interrupt and the
96 second element is associated with the receive (RX) interrupt.
97
98Port-Write Unit:
99
100 - compatible
101 Usage: required
102 Value type: <string>
103 Definition: Must include:
104 "fsl,srio-port-write-unit-vX.Y", "fsl,srio-port-write-unit"
105
106 The version X.Y should match the general SRIO controller's IP Block
107 revision register's Major(X) and Minor (Y) value.
108
109 - reg
110 Usage: required
111 Value type: <prop-encoded-array>
112 Definition: A standard property. Specifies the physical address and
113 length of the SRIO configuration registers for message units
114 and doorbell units.
115
116 - interrupts
117 Usage: required
118 Value type: <prop_encoded-array>
119 Definition: Specifies the interrupts generated by this device. The
120 value of the interrupts property consists of one interrupt
121 specifier. The format of the specifier is defined by the
122 binding document describing the node's interrupt parent.
123
124 A single IRQ that handles port-write conditions is
125 specified by this property. (Typically shared with error).
126
127 Note: All other standard properties (see the ePAPR) are allowed
128 but are optional.
129
130Example:
131 rmu: rmu@d3000 {
132 compatible = "fsl,srio-rmu";
133 reg = <0xd3000 0x400>;
134 ranges = <0x0 0xd3000 0x400>;
135 fsl,liodn = <0xc8>;
136
137 message-unit@0 {
138 compatible = "fsl,srio-msg-unit";
139 reg = <0x0 0x100>;
140 interrupts = <
141 60 2 0 0 /* msg1_tx_irq */
142 61 2 0 0>;/* msg1_rx_irq */
143 };
144 message-unit@100 {
145 compatible = "fsl,srio-msg-unit";
146 reg = <0x100 0x100>;
147 interrupts = <
148 62 2 0 0 /* msg2_tx_irq */
149 63 2 0 0>;/* msg2_rx_irq */
150 };
151 doorbell-unit@400 {
152 compatible = "fsl,srio-dbell-unit";
153 reg = <0x400 0x80>;
154 interrupts = <
155 56 2 0 0 /* bell_outb_irq */
156 57 2 0 0>;/* bell_inb_irq */
157 };
158 port-write-unit@4e0 {
159 compatible = "fsl,srio-port-write-unit";
160 reg = <0x4e0 0x20>;
161 interrupts = <16 2 1 11>;
162 };
163 };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/srio.txt b/Documentation/devicetree/bindings/powerpc/fsl/srio.txt
new file mode 100644
index 000000000000..b039bcbee134
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/srio.txt
@@ -0,0 +1,103 @@
1* Freescale Serial RapidIO (SRIO) Controller
2
3RapidIO port node:
4Properties:
5 - compatible
6 Usage: required
7 Value type: <string>
8 Definition: Must include "fsl,srio" for IP blocks with IP Block
9 Revision Register (SRIO IPBRR1) Major ID equal to 0x01c0.
10
11 Optionally, a compatiable string of "fsl,srio-vX.Y" where X is Major
12 version in IP Block Revision Register and Y is Minor version. If this
13 compatiable is provided it should be ordered before "fsl,srio".
14
15 - reg
16 Usage: required
17 Value type: <prop-encoded-array>
18 Definition: A standard property. Specifies the physical address and
19 length of the SRIO configuration registers. The size should
20 be set to 0x11000.
21
22 - interrupts
23 Usage: required
24 Value type: <prop_encoded-array>
25 Definition: Specifies the interrupts generated by this device. The
26 value of the interrupts property consists of one interrupt
27 specifier. The format of the specifier is defined by the
28 binding document describing the node's interrupt parent.
29
30 A single IRQ that handles error conditions is specified by this
31 property. (Typically shared with port-write).
32
33 - fsl,srio-rmu-handle:
34 Usage: required if rmu node is defined
35 Value type: <phandle>
36 Definition: A single <phandle> value that points to the RMU.
37 (See srio-rmu.txt for more details on RMU node binding)
38
39Port Child Nodes: There should a port child node for each port that exists in
40the controller. The ports are numbered starting at one (1) and should have
41the following properties:
42
43 - cell-index
44 Usage: required
45 Value type: <u32>
46 Definition: A standard property. Matches the port id.
47
48 - ranges
49 Usage: required if local access windows preset
50 Value type: <prop-encoded-array>
51 Definition: A standard property. Utilized to describe the memory mapped
52 IO space utilized by the controller. This corresponds to the
53 setting of the local access windows that are targeted to this
54 SRIO port.
55
56 - fsl,liodn
57 Usage: optional-but-recommended (for devices with PAMU)
58 Value type: <prop-encoded-array>
59 Definition: The logical I/O device number for the PAMU (IOMMU) to be
60 correctly configured for SRIO accesses. The property should
61 not exist on devices that do not support PAMU.
62
63 For HW (ie, the P4080) that only supports a LIODN for both
64 memory and maintenance transactions then a single LIODN is
65 represented in the property for both transactions.
66
67 For HW (ie, the P304x/P5020, etc) that supports an LIODN for
68 memory transactions and a unique LIODN for maintenance
69 transactions then a pair of LIODNs are represented in the
70 property. Within the pair, the first element represents the
71 LIODN associated with memory transactions and the second element
72 represents the LIODN associated with maintenance transactions
73 for the port.
74
75Note: All other standard properties (see ePAPR) are allowed but are optional.
76
77Example:
78
79 rapidio: rapidio@ffe0c0000 {
80 #address-cells = <2>;
81 #size-cells = <2>;
82 reg = <0xf 0xfe0c0000 0 0x11000>;
83 compatible = "fsl,srio";
84 interrupts = <16 2 1 11>; /* err_irq */
85 fsl,srio-rmu-handle = <&rmu>;
86 ranges;
87
88 port1 {
89 cell-index = <1>;
90 #address-cells = <2>;
91 #size-cells = <2>;
92 fsl,liodn = <34>;
93 ranges = <0 0 0xc 0x20000000 0 0x10000000>;
94 };
95
96 port2 {
97 cell-index = <2>;
98 #address-cells = <2>;
99 #size-cells = <2>;
100 fsl,liodn = <48>;
101 ranges = <0 0 0xc 0x30000000 0 0x10000000>;
102 };
103 };
diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.txt b/Documentation/devicetree/bindings/regulator/fixed-regulator.txt
new file mode 100644
index 000000000000..9cf57fd042d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.txt
@@ -0,0 +1,29 @@
1Fixed Voltage regulators
2
3Required properties:
4- compatible: Must be "regulator-fixed";
5
6Optional properties:
7- gpio: gpio to use for enable control
8- startup-delay-us: startup time in microseconds
9- enable-active-high: Polarity of GPIO is Active high
10If this property is missing, the default assumed is Active low.
11
12Any property defined as part of the core regulator
13binding, defined in regulator.txt, can also be used.
14However a fixed voltage regulator is expected to have the
15regulator-min-microvolt and regulator-max-microvolt
16to be the same.
17
18Example:
19
20 abc: fixedregulator@0 {
21 compatible = "regulator-fixed";
22 regulator-name = "fixed-supply";
23 regulator-min-microvolt = <1800000>;
24 regulator-max-microvolt = <1800000>;
25 gpio = <&gpio1 16 0>;
26 startup-delay-us = <70000>;
27 enable-active-high;
28 regulator-boot-on
29 };
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
new file mode 100644
index 000000000000..5b7a408acdaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -0,0 +1,54 @@
1Voltage/Current Regulators
2
3Optional properties:
4- regulator-name: A string used as a descriptive name for regulator outputs
5- regulator-min-microvolt: smallest voltage consumers may set
6- regulator-max-microvolt: largest voltage consumers may set
7- regulator-microvolt-offset: Offset applied to voltages to compensate for voltage drops
8- regulator-min-microamp: smallest current consumers may set
9- regulator-max-microamp: largest current consumers may set
10- regulator-always-on: boolean, regulator should never be disabled
11- regulator-boot-on: bootloader/firmware enabled regulator
12- <name>-supply: phandle to the parent supply/regulator node
13
14Example:
15
16 xyzreg: regulator@0 {
17 regulator-min-microvolt = <1000000>;
18 regulator-max-microvolt = <2500000>;
19 regulator-always-on;
20 vin-supply = <&vin>;
21 };
22
23Regulator Consumers:
24Consumer nodes can reference one or more of its supplies/
25regulators using the below bindings.
26
27- <name>-supply: phandle to the regulator node
28
29These are the same bindings that a regulator in the above
30example used to reference its own supply, in which case
31its just seen as a special case of a regulator being a
32consumer itself.
33
34Example of a consumer device node (mmc) referencing two
35regulators (twl_reg1 and twl_reg2),
36
37 twl_reg1: regulator@0 {
38 ...
39 ...
40 ...
41 };
42
43 twl_reg2: regulator@1 {
44 ...
45 ...
46 ...
47 };
48
49 mmc: mmc@0x0 {
50 ...
51 ...
52 vmmc-supply = <&twl_reg1>;
53 vmmcaux-supply = <&twl_reg2>;
54 };
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
new file mode 100644
index 000000000000..90ec45fd33ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
@@ -0,0 +1,20 @@
1* Samsung's S3C Real Time Clock controller
2
3Required properties:
4- compatible: should be one of the following.
5 * "samsung,s3c2410-rtc" - for controllers compatible with s3c2410 rtc.
6 * "samsung,s3c6410-rtc" - for controllers compatible with s3c6410 rtc.
7- reg: physical base address of the controller and length of memory mapped
8 region.
9- interrupts: Two interrupt numbers to the cpu should be specified. First
10 interrupt number is the rtc alarm interupt and second interrupt number
11 is the rtc tick interrupt. The number of cells representing a interrupt
12 depends on the parent interrupt controller.
13
14Example:
15
16 rtc@10070000 {
17 compatible = "samsung,s3c6410-rtc";
18 reg = <0x10070000 0x100>;
19 interrupts = <44 0 45 0>;
20 };
diff --git a/Documentation/devicetree/bindings/rtc/twl-rtc.txt b/Documentation/devicetree/bindings/rtc/twl-rtc.txt
new file mode 100644
index 000000000000..596e0c97be7a
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/twl-rtc.txt
@@ -0,0 +1,12 @@
1* TI twl RTC
2
3The TWL family (twl4030/6030) contains a RTC.
4
5Required properties:
6- compatible : Should be twl4030-rtc
7
8Examples:
9
10rtc@0 {
11 compatible = "ti,twl4030-rtc";
12};
diff --git a/Documentation/devicetree/bindings/serial/omap_serial.txt b/Documentation/devicetree/bindings/serial/omap_serial.txt
new file mode 100644
index 000000000000..342eedd10050
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/omap_serial.txt
@@ -0,0 +1,10 @@
1OMAP UART controller
2
3Required properties:
4- compatible : should be "ti,omap2-uart" for OMAP2 controllers
5- compatible : should be "ti,omap3-uart" for OMAP3 controllers
6- compatible : should be "ti,omap4-uart" for OMAP4 controllers
7- ti,hwmods : Must be "uart<n>", n being the instance number (1-based)
8
9Optional properties:
10- clock-frequency : frequency of the clock input to the UART
diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.txt b/Documentation/devicetree/bindings/serial/samsung_uart.txt
new file mode 100644
index 000000000000..2c8a17cf5cb5
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/samsung_uart.txt
@@ -0,0 +1,14 @@
1* Samsung's UART Controller
2
3The Samsung's UART controller is used for interfacing SoC with serial communicaion
4devices.
5
6Required properties:
7- compatible: should be
8 - "samsung,exynos4210-uart", for UART's compatible with Exynos4210 uart ports.
9
10- reg: base physical address of the controller and length of memory mapped
11 region.
12
13- interrupts: interrupt number to the cpu. The interrupt specifier format depends
14 on the interrupt controller parent.
diff --git a/Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt b/Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt
new file mode 100644
index 000000000000..d5b0da8bf1d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt
@@ -0,0 +1,71 @@
1NVIDIA Tegra audio complex
2
3Required properties:
4- compatible : "nvidia,tegra-audio-wm8903"
5- nvidia,model : The user-visible name of this sound complex.
6- nvidia,audio-routing : A list of the connections between audio components.
7 Each entry is a pair of strings, the first being the connection's sink,
8 the second being the connection's source. Valid names for sources and
9 sinks are the WM8903's pins, and the jacks on the board:
10
11 WM8903 pins:
12
13 * IN1L
14 * IN1R
15 * IN2L
16 * IN2R
17 * IN3L
18 * IN3R
19 * DMICDAT
20 * HPOUTL
21 * HPOUTR
22 * LINEOUTL
23 * LINEOUTR
24 * LOP
25 * LON
26 * ROP
27 * RON
28 * MICBIAS
29
30 Board connectors:
31
32 * Headphone Jack
33 * Int Spk
34 * Mic Jack
35
36- nvidia,i2s-controller : The phandle of the Tegra I2S1 controller
37- nvidia,audio-codec : The phandle of the WM8903 audio codec
38
39Optional properties:
40- nvidia,spkr-en-gpios : The GPIO that enables the speakers
41- nvidia,hp-mute-gpios : The GPIO that mutes the headphones
42- nvidia,hp-det-gpios : The GPIO that detect headphones are plugged in
43- nvidia,int-mic-en-gpios : The GPIO that enables the internal microphone
44- nvidia,ext-mic-en-gpios : The GPIO that enables the external microphone
45
46Example:
47
48sound {
49 compatible = "nvidia,tegra-audio-wm8903-harmony",
50 "nvidia,tegra-audio-wm8903"
51 nvidia,model = "tegra-wm8903-harmony";
52
53 nvidia,audio-routing =
54 "Headphone Jack", "HPOUTR",
55 "Headphone Jack", "HPOUTL",
56 "Int Spk", "ROP",
57 "Int Spk", "RON",
58 "Int Spk", "LOP",
59 "Int Spk", "LON",
60 "Mic Jack", "MICBIAS",
61 "IN1L", "Mic Jack";
62
63 nvidia,i2s-controller = <&i2s1>;
64 nvidia,audio-codec = <&wm8903>;
65
66 nvidia,spkr-en-gpios = <&codec 2 0>;
67 nvidia,hp-det-gpios = <&gpio 178 0>; /* gpio PW2 */
68 nvidia,int-mic-en-gpios = <&gpio 184 0>; /*gpio PX0 */
69 nvidia,ext-mic-en-gpios = <&gpio 185 0>; /* gpio PX1 */
70};
71
diff --git a/Documentation/devicetree/bindings/sound/tegra20-das.txt b/Documentation/devicetree/bindings/sound/tegra20-das.txt
new file mode 100644
index 000000000000..6de3a7ee4efb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tegra20-das.txt
@@ -0,0 +1,12 @@
1NVIDIA Tegra 20 DAS (Digital Audio Switch) controller
2
3Required properties:
4- compatible : "nvidia,tegra20-das"
5- reg : Should contain DAS registers location and length
6
7Example:
8
9das@70000c00 {
10 compatible = "nvidia,tegra20-das";
11 reg = <0x70000c00 0x80>;
12};
diff --git a/Documentation/devicetree/bindings/sound/tegra20-i2s.txt b/Documentation/devicetree/bindings/sound/tegra20-i2s.txt
new file mode 100644
index 000000000000..0df2b5c816e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tegra20-i2s.txt
@@ -0,0 +1,17 @@
1NVIDIA Tegra 20 I2S controller
2
3Required properties:
4- compatible : "nvidia,tegra20-i2s"
5- reg : Should contain I2S registers location and length
6- interrupts : Should contain I2S interrupt
7- nvidia,dma-request-selector : The Tegra DMA controller's phandle and
8 request selector for this I2S controller
9
10Example:
11
12i2s@70002800 {
13 compatible = "nvidia,tegra20-i2s";
14 reg = <0x70002800 0x200>;
15 interrupts = < 45 >;
16 nvidia,dma-request-selector = < &apbdma 2 >;
17};
diff --git a/Documentation/devicetree/bindings/sound/wm8903.txt b/Documentation/devicetree/bindings/sound/wm8903.txt
new file mode 100644
index 000000000000..f102cbc42694
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8903.txt
@@ -0,0 +1,50 @@
1WM8903 audio CODEC
2
3This device supports I2C only.
4
5Required properties:
6
7 - compatible : "wlf,wm8903"
8
9 - reg : the I2C address of the device.
10
11 - gpio-controller : Indicates this device is a GPIO controller.
12
13 - #gpio-cells : Should be two. The first cell is the pin number and the
14 second cell is used to specify optional parameters (currently unused).
15
16Optional properties:
17
18 - interrupts : The interrupt line the codec is connected to.
19
20 - micdet-cfg : Default register value for R6 (Mic Bias). If absent, the
21 default is 0.
22
23 - micdet-delay : The debounce delay for microphone detection in mS. If
24 absent, the default is 100.
25
26 - gpio-cfg : A list of GPIO configuration register values. The list must
27 be 5 entries long. If absent, no configuration of these registers is
28 performed. If any entry has the value 0xffffffff, that GPIO's
29 configuration will not be modified.
30
31Example:
32
33codec: wm8903@1a {
34 compatible = "wlf,wm8903";
35 reg = <0x1a>;
36 interrupts = < 347 >;
37
38 gpio-controller;
39 #gpio-cells = <2>;
40
41 micdet-cfg = <0>;
42 micdet-delay = <100>;
43 gpio-cfg = <
44 0x0600 /* DMIC_LR, output */
45 0x0680 /* DMIC_DAT, input */
46 0x0000 /* GPIO, output, low */
47 0x0200 /* Interrupt, output */
48 0x01a0 /* BCLK, input, active high */
49 >;
50};
diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt
new file mode 100644
index 000000000000..7a7eb1e7bda6
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8994.txt
@@ -0,0 +1,18 @@
1WM1811/WM8994/WM8958 audio CODEC
2
3These devices support both I2C and SPI (configured with pin strapping
4on the board).
5
6Required properties:
7
8 - compatible : "wlf,wm1811", "wlf,wm8994", "wlf,wm8958"
9
10 - reg : the I2C address of the device for I2C, the chip select
11 number for SPI.
12
13Example:
14
15codec: wm8994@1a {
16 compatible = "wlf,wm8994";
17 reg = <0x1a>;
18};
diff --git a/Documentation/devicetree/bindings/usb/tegra-usb.txt b/Documentation/devicetree/bindings/usb/tegra-usb.txt
new file mode 100644
index 000000000000..035d63d5646d
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/tegra-usb.txt
@@ -0,0 +1,13 @@
1Tegra SOC USB controllers
2
3The device node for a USB controller that is part of a Tegra
4SOC is as described in the document "Open Firmware Recommended
5Practice : Universal Serial Bus" with the following modifications
6and additions :
7
8Required properties :
9 - compatible : Should be "nvidia,tegra20-ehci" for USB controllers
10 used in host mode.
11 - phy_type : Should be one of "ulpi" or "utmi".
12 - nvidia,vbus-gpio : If present, specifies a gpio that needs to be
13 activated for the bus to be powered.
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 874921e97802..ecc6a6cd26c1 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -8,7 +8,9 @@ amcc Applied Micro Circuits Corporation (APM, formally AMCC)
8apm Applied Micro Circuits Corporation (APM) 8apm Applied Micro Circuits Corporation (APM)
9arm ARM Ltd. 9arm ARM Ltd.
10atmel Atmel Corporation 10atmel Atmel Corporation
11cavium Cavium, Inc.
11chrp Common Hardware Reference Platform 12chrp Common Hardware Reference Platform
13cortina Cortina Systems, Inc.
12dallas Maxim Integrated Products (formerly Dallas Semiconductor) 14dallas Maxim Integrated Products (formerly Dallas Semiconductor)
13denx Denx Software Engineering 15denx Denx Software Engineering
14epson Seiko Epson Corp. 16epson Seiko Epson Corp.
@@ -32,10 +34,13 @@ powervr Imagination Technologies
32qcom Qualcomm, Inc. 34qcom Qualcomm, Inc.
33ramtron Ramtron International 35ramtron Ramtron International
34samsung Samsung Semiconductor 36samsung Samsung Semiconductor
37sbs Smart Battery System
35schindler Schindler 38schindler Schindler
36sil Silicon Image 39sil Silicon Image
37simtek 40simtek
38sirf SiRF Technology, Inc. 41sirf SiRF Technology, Inc.
42st STMicroelectronics
39stericsson ST-Ericsson 43stericsson ST-Ericsson
40ti Texas Instruments 44ti Texas Instruments
45wlf Wolfson Microelectronics
41xlnx Xilinx 46xlnx Xilinx
diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt
new file mode 100644
index 000000000000..3f682889068b
--- /dev/null
+++ b/Documentation/digsig.txt
@@ -0,0 +1,96 @@
1Digital Signature Verification API
2
3CONTENTS
4
51. Introduction
62. API
73. User-space utilities
8
9
101. Introduction
11
12Digital signature verification API provides a method to verify digital signature.
13Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
14
15Digital signature verification is implemented using cut-down kernel port of
16GnuPG multi-precision integers (MPI) library. The kernel port provides
17memory allocation errors handling, has been refactored according to kernel
18coding style, and checkpatch.pl reported errors and warnings have been fixed.
19
20Public key and signature consist of header and MPIs.
21
22struct pubkey_hdr {
23 uint8_t version; /* key format version */
24 time_t timestamp; /* key made, always 0 for now */
25 uint8_t algo;
26 uint8_t nmpi;
27 char mpi[0];
28} __packed;
29
30struct signature_hdr {
31 uint8_t version; /* signature format version */
32 time_t timestamp; /* signature made */
33 uint8_t algo;
34 uint8_t hash;
35 uint8_t keyid[8];
36 uint8_t nmpi;
37 char mpi[0];
38} __packed;
39
40keyid equals to SHA1[12-19] over the total key content.
41Signature header is used as an input to generate a signature.
42Such approach insures that key or signature header could not be changed.
43It protects timestamp from been changed and can be used for rollback
44protection.
45
462. API
47
48API currently includes only 1 function:
49
50 digsig_verify() - digital signature verification with public key
51
52
53/**
54 * digsig_verify() - digital signature verification with public key
55 * @keyring: keyring to search key in
56 * @sig: digital signature
57 * @sigen: length of the signature
58 * @data: data
59 * @datalen: length of the data
60 * @return: 0 on success, -EINVAL otherwise
61 *
62 * Verifies data integrity against digital signature.
63 * Currently only RSA is supported.
64 * Normally hash of the content is used as a data for this function.
65 *
66 */
67int digsig_verify(struct key *keyring, const char *sig, int siglen,
68 const char *data, int datalen);
69
703. User-space utilities
71
72The signing and key management utilities evm-utils provide functionality
73to generate signatures, to load keys into the kernel keyring.
74Keys can be in PEM or converted to the kernel format.
75When the key is added to the kernel keyring, the keyid defines the name
76of the key: 5D2B05FC633EE3E8 in the example bellow.
77
78Here is example output of the keyctl utility.
79
80$ keyctl show
81Session Keyring
82 -3 --alswrv 0 0 keyring: _ses
83603976250 --alswrv 0 -1 \_ keyring: _uid.0
84817777377 --alswrv 0 0 \_ user: kmk
85891974900 --alswrv 0 0 \_ encrypted: evm-key
86170323636 --alswrv 0 0 \_ keyring: _module
87548221616 --alswrv 0 0 \_ keyring: _ima
88128198054 --alswrv 0 0 \_ keyring: _evm
89
90$ keyctl list 128198054
911 key in keyring:
92620789745: --alswrv 0 0 user: 5D2B05FC633EE3E8
93
94
95Dmitry Kasatkin
9606.10.2011
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
new file mode 100644
index 000000000000..510eab32f392
--- /dev/null
+++ b/Documentation/dma-buf-sharing.txt
@@ -0,0 +1,224 @@
1 DMA Buffer Sharing API Guide
2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3
4 Sumit Semwal
5 <sumit dot semwal at linaro dot org>
6 <sumit dot semwal at ti dot com>
7
8This document serves as a guide to device-driver writers on what is the dma-buf
9buffer sharing API, how to use it for exporting and using shared buffers.
10
11Any device driver which wishes to be a part of DMA buffer sharing, can do so as
12either the 'exporter' of buffers, or the 'user' of buffers.
13
14Say a driver A wants to use buffers created by driver B, then we call B as the
15exporter, and A as buffer-user.
16
17The exporter
18- implements and manages operations[1] for the buffer
19- allows other users to share the buffer by using dma_buf sharing APIs,
20- manages the details of buffer allocation,
21- decides about the actual backing storage where this allocation happens,
22- takes care of any migration of scatterlist - for all (shared) users of this
23 buffer,
24
25The buffer-user
26- is one of (many) sharing users of the buffer.
27- doesn't need to worry about how the buffer is allocated, or where.
28- needs a mechanism to get access to the scatterlist that makes up this buffer
29 in memory, mapped into its own address space, so it can access the same area
30 of memory.
31
32*IMPORTANT*: [see https://lkml.org/lkml/2011/12/20/211 for more details]
33For this first version, A buffer shared using the dma_buf sharing API:
34- *may* be exported to user space using "mmap" *ONLY* by exporter, outside of
35 this framework.
36- may be used *ONLY* by importers that do not need CPU access to the buffer.
37
38The dma_buf buffer sharing API usage contains the following steps:
39
401. Exporter announces that it wishes to export a buffer
412. Userspace gets the file descriptor associated with the exported buffer, and
42 passes it around to potential buffer-users based on use case
433. Each buffer-user 'connects' itself to the buffer
444. When needed, buffer-user requests access to the buffer from exporter
455. When finished with its use, the buffer-user notifies end-of-DMA to exporter
466. when buffer-user is done using this buffer completely, it 'disconnects'
47 itself from the buffer.
48
49
501. Exporter's announcement of buffer export
51
52 The buffer exporter announces its wish to export a buffer. In this, it
53 connects its own private buffer data, provides implementation for operations
54 that can be performed on the exported dma_buf, and flags for the file
55 associated with this buffer.
56
57 Interface:
58 struct dma_buf *dma_buf_export(void *priv, struct dma_buf_ops *ops,
59 size_t size, int flags)
60
61 If this succeeds, dma_buf_export allocates a dma_buf structure, and returns a
62 pointer to the same. It also associates an anonymous file with this buffer,
63 so it can be exported. On failure to allocate the dma_buf object, it returns
64 NULL.
65
662. Userspace gets a handle to pass around to potential buffer-users
67
68 Userspace entity requests for a file-descriptor (fd) which is a handle to the
69 anonymous file associated with the buffer. It can then share the fd with other
70 drivers and/or processes.
71
72 Interface:
73 int dma_buf_fd(struct dma_buf *dmabuf)
74
75 This API installs an fd for the anonymous file associated with this buffer;
76 returns either 'fd', or error.
77
783. Each buffer-user 'connects' itself to the buffer
79
80 Each buffer-user now gets a reference to the buffer, using the fd passed to
81 it.
82
83 Interface:
84 struct dma_buf *dma_buf_get(int fd)
85
86 This API will return a reference to the dma_buf, and increment refcount for
87 it.
88
89 After this, the buffer-user needs to attach its device with the buffer, which
90 helps the exporter to know of device buffer constraints.
91
92 Interface:
93 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
94 struct device *dev)
95
96 This API returns reference to an attachment structure, which is then used
97 for scatterlist operations. It will optionally call the 'attach' dma_buf
98 operation, if provided by the exporter.
99
100 The dma-buf sharing framework does the bookkeeping bits related to managing
101 the list of all attachments to a buffer.
102
103Until this stage, the buffer-exporter has the option to choose not to actually
104allocate the backing storage for this buffer, but wait for the first buffer-user
105to request use of buffer for allocation.
106
107
1084. When needed, buffer-user requests access to the buffer
109
110 Whenever a buffer-user wants to use the buffer for any DMA, it asks for
111 access to the buffer using dma_buf_map_attachment API. At least one attach to
112 the buffer must have happened before map_dma_buf can be called.
113
114 Interface:
115 struct sg_table * dma_buf_map_attachment(struct dma_buf_attachment *,
116 enum dma_data_direction);
117
118 This is a wrapper to dma_buf->ops->map_dma_buf operation, which hides the
119 "dma_buf->ops->" indirection from the users of this interface.
120
121 In struct dma_buf_ops, map_dma_buf is defined as
122 struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *,
123 enum dma_data_direction);
124
125 It is one of the buffer operations that must be implemented by the exporter.
126 It should return the sg_table containing scatterlist for this buffer, mapped
127 into caller's address space.
128
129 If this is being called for the first time, the exporter can now choose to
130 scan through the list of attachments for this buffer, collate the requirements
131 of the attached devices, and choose an appropriate backing storage for the
132 buffer.
133
134 Based on enum dma_data_direction, it might be possible to have multiple users
135 accessing at the same time (for reading, maybe), or any other kind of sharing
136 that the exporter might wish to make available to buffer-users.
137
138 map_dma_buf() operation can return -EINTR if it is interrupted by a signal.
139
140
1415. When finished, the buffer-user notifies end-of-DMA to exporter
142
143 Once the DMA for the current buffer-user is over, it signals 'end-of-DMA' to
144 the exporter using the dma_buf_unmap_attachment API.
145
146 Interface:
147 void dma_buf_unmap_attachment(struct dma_buf_attachment *,
148 struct sg_table *);
149
150 This is a wrapper to dma_buf->ops->unmap_dma_buf() operation, which hides the
151 "dma_buf->ops->" indirection from the users of this interface.
152
153 In struct dma_buf_ops, unmap_dma_buf is defined as
154 void (*unmap_dma_buf)(struct dma_buf_attachment *, struct sg_table *);
155
156 unmap_dma_buf signifies the end-of-DMA for the attachment provided. Like
157 map_dma_buf, this API also must be implemented by the exporter.
158
159
1606. when buffer-user is done using this buffer, it 'disconnects' itself from the
161 buffer.
162
163 After the buffer-user has no more interest in using this buffer, it should
164 disconnect itself from the buffer:
165
166 - it first detaches itself from the buffer.
167
168 Interface:
169 void dma_buf_detach(struct dma_buf *dmabuf,
170 struct dma_buf_attachment *dmabuf_attach);
171
172 This API removes the attachment from the list in dmabuf, and optionally calls
173 dma_buf->ops->detach(), if provided by exporter, for any housekeeping bits.
174
175 - Then, the buffer-user returns the buffer reference to exporter.
176
177 Interface:
178 void dma_buf_put(struct dma_buf *dmabuf);
179
180 This API then reduces the refcount for this buffer.
181
182 If, as a result of this call, the refcount becomes 0, the 'release' file
183 operation related to this fd is called. It calls the dmabuf->ops->release()
184 operation in turn, and frees the memory allocated for dmabuf when exported.
185
186NOTES:
187- Importance of attach-detach and {map,unmap}_dma_buf operation pairs
188 The attach-detach calls allow the exporter to figure out backing-storage
189 constraints for the currently-interested devices. This allows preferential
190 allocation, and/or migration of pages across different types of storage
191 available, if possible.
192
193 Bracketing of DMA access with {map,unmap}_dma_buf operations is essential
194 to allow just-in-time backing of storage, and migration mid-way through a
195 use-case.
196
197- Migration of backing storage if needed
198 If after
199 - at least one map_dma_buf has happened,
200 - and the backing storage has been allocated for this buffer,
201 another new buffer-user intends to attach itself to this buffer, it might
202 be allowed, if possible for the exporter.
203
204 In case it is allowed by the exporter:
205 if the new buffer-user has stricter 'backing-storage constraints', and the
206 exporter can handle these constraints, the exporter can just stall on the
207 map_dma_buf until all outstanding access is completed (as signalled by
208 unmap_dma_buf).
209 Once all users have finished accessing and have unmapped this buffer, the
210 exporter could potentially move the buffer to the stricter backing-storage,
211 and then allow further {map,unmap}_dma_buf operations from any buffer-user
212 from the migrated backing-storage.
213
214 If the exporter cannot fulfil the backing-storage constraints of the new
215 buffer-user device as requested, dma_buf_attach() would return an error to
216 denote non-compatibility of the new buffer-sharing request with the current
217 buffer.
218
219 If the exporter chooses not to allow an attach() operation once a
220 map_dma_buf() API has been called, it simply returns an error.
221
222References:
223[1] struct dma_buf_ops in include/linux/dma-buf.h
224[2] All interfaces mentioned above defined in include/linux/dma-buf.h
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index dfa6fc6e4b28..0c083c5c2faa 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -66,7 +66,6 @@ GRTAGS
66GSYMS 66GSYMS
67GTAGS 67GTAGS
68Image 68Image
69Kerntypes
70Module.markers 69Module.markers
71Module.symvers 70Module.symvers
72PENDING 71PENDING
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index d79aead9418b..10c64c8a13d4 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -262,6 +262,7 @@ IOMAP
262 devm_ioremap() 262 devm_ioremap()
263 devm_ioremap_nocache() 263 devm_ioremap_nocache()
264 devm_iounmap() 264 devm_iounmap()
265 devm_request_and_ioremap() : checks resource, requests region, ioremaps
265 pcim_iomap() 266 pcim_iomap()
266 pcim_iounmap() 267 pcim_iounmap()
267 pcim_iomap_table() : array of mapped addresses indexed by BAR 268 pcim_iomap_table() : array of mapped addresses indexed by BAR
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3d849122b5b1..d49c2ec72d12 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -85,17 +85,6 @@ Who: Robin Getz <rgetz@blackfin.uclinux.org> & Matt Mackall <mpm@selenic.com>
85 85
86--------------------------- 86---------------------------
87 87
88What: Deprecated snapshot ioctls
89When: 2.6.36
90
91Why: The ioctls in kernel/power/user.c were marked as deprecated long time
92 ago. Now they notify users about that so that they need to replace
93 their userspace. After some more time, remove them completely.
94
95Who: Jiri Slaby <jirislaby@gmail.com>
96
97---------------------------
98
99What: The ieee80211_regdom module parameter 88What: The ieee80211_regdom module parameter
100When: March 2010 / desktop catchup 89When: March 2010 / desktop catchup
101 90
@@ -263,8 +252,7 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org>
263 252
264What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS 253What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS
265 (in net/core/net-sysfs.c) 254 (in net/core/net-sysfs.c)
266When: After the only user (hal) has seen a release with the patches 255When: 3.5
267 for enough time, probably some time in 2010.
268Why: Over 1K .text/.data size reduction, data is available in other 256Why: Over 1K .text/.data size reduction, data is available in other
269 ways (ioctls) 257 ways (ioctls)
270Who: Johannes Berg <johannes@sipsolutions.net> 258Who: Johannes Berg <johannes@sipsolutions.net>
@@ -362,15 +350,6 @@ Who: anybody or Florian Mickler <florian@mickler.org>
362 350
363---------------------------- 351----------------------------
364 352
365What: KVM paravirt mmu host support
366When: January 2011
367Why: The paravirt mmu host support is slower than non-paravirt mmu, both
368 on newer and older hardware. It is already not exposed to the guest,
369 and kept only for live migration purposes.
370Who: Avi Kivity <avi@redhat.com>
371
372----------------------------
373
374What: iwlwifi 50XX module parameters 353What: iwlwifi 50XX module parameters
375When: 3.0 354When: 3.0
376Why: The "..50" modules parameters were used to configure 5000 series and 355Why: The "..50" modules parameters were used to configure 5000 series and
@@ -535,6 +514,20 @@ Why: In 3.0, we can now autodetect internal 3G device and already have
535 information log when acer-wmi initial. 514 information log when acer-wmi initial.
536Who: Lee, Chun-Yi <jlee@novell.com> 515Who: Lee, Chun-Yi <jlee@novell.com>
537 516
517---------------------------
518
519What: /sys/devices/platform/_UDC_/udc/_UDC_/is_dualspeed file and
520 is_dualspeed line in /sys/devices/platform/ci13xxx_*/udc/device file.
521When: 3.8
522Why: The is_dualspeed file is superseded by maximum_speed in the same
523 directory and is_dualspeed line in device file is superseded by
524 max_speed line in the same file.
525
526 The maximum_speed/max_speed specifies maximum speed supported by UDC.
527 To check if dualspeeed is supported, check if the value is >= 3.
528 Various possible speeds are defined in <linux/usb/ch9.h>.
529Who: Michal Nazarewicz <mina86@mina86.com>
530
538---------------------------- 531----------------------------
539 532
540What: The XFS nodelaylog mount option 533What: The XFS nodelaylog mount option
@@ -551,3 +544,15 @@ When: 3.5
551Why: The iwlagn module has been renamed iwlwifi. The alias will be around 544Why: The iwlagn module has been renamed iwlwifi. The alias will be around
552 for backward compatibility for several cycles and then dropped. 545 for backward compatibility for several cycles and then dropped.
553Who: Don Fry <donald.h.fry@intel.com> 546Who: Don Fry <donald.h.fry@intel.com>
547
548----------------------------
549
550What: pci_scan_bus_parented()
551When: 3.5
552Why: The pci_scan_bus_parented() interface creates a new root bus. The
553 bus is created with default resources (ioport_resource and
554 iomem_resource) that are always wrong, so we rely on arch code to
555 correct them later. Callers of pci_scan_bus_parented() should
556 convert to using pci_scan_root_bus() so they can supply a list of
557 bus resources when the bus is created.
558Who: Bjorn Helgaas <bhelgaas@google.com>
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index d819ba16a0c7..4fca82e5276e 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -37,15 +37,15 @@ d_manage: no no yes (ref-walk) maybe
37 37
38--------------------------- inode_operations --------------------------- 38--------------------------- inode_operations ---------------------------
39prototypes: 39prototypes:
40 int (*create) (struct inode *,struct dentry *,int, struct nameidata *); 40 int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *);
41 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid 41 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid
42ata *); 42ata *);
43 int (*link) (struct dentry *,struct inode *,struct dentry *); 43 int (*link) (struct dentry *,struct inode *,struct dentry *);
44 int (*unlink) (struct inode *,struct dentry *); 44 int (*unlink) (struct inode *,struct dentry *);
45 int (*symlink) (struct inode *,struct dentry *,const char *); 45 int (*symlink) (struct inode *,struct dentry *,const char *);
46 int (*mkdir) (struct inode *,struct dentry *,int); 46 int (*mkdir) (struct inode *,struct dentry *,umode_t);
47 int (*rmdir) (struct inode *,struct dentry *); 47 int (*rmdir) (struct inode *,struct dentry *);
48 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 48 int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
49 int (*rename) (struct inode *, struct dentry *, 49 int (*rename) (struct inode *, struct dentry *,
50 struct inode *, struct dentry *); 50 struct inode *, struct dentry *);
51 int (*readlink) (struct dentry *, char __user *,int); 51 int (*readlink) (struct dentry *, char __user *,int);
@@ -117,7 +117,7 @@ prototypes:
117 int (*statfs) (struct dentry *, struct kstatfs *); 117 int (*statfs) (struct dentry *, struct kstatfs *);
118 int (*remount_fs) (struct super_block *, int *, char *); 118 int (*remount_fs) (struct super_block *, int *, char *);
119 void (*umount_begin) (struct super_block *); 119 void (*umount_begin) (struct super_block *);
120 int (*show_options)(struct seq_file *, struct vfsmount *); 120 int (*show_options)(struct seq_file *, struct dentry *);
121 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 121 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
122 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 122 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
123 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 123 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index dd57bb6bb390..b40fec9d3f53 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -192,7 +192,7 @@ attribute value uses the store_attribute() method.
192 struct configfs_attribute { 192 struct configfs_attribute {
193 char *ca_name; 193 char *ca_name;
194 struct module *ca_owner; 194 struct module *ca_owner;
195 mode_t ca_mode; 195 umode_t ca_mode;
196 }; 196 };
197 197
198When a config_item wants an attribute to appear as a file in the item's 198When a config_item wants an attribute to appear as a file in the item's
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 742cc06e138f..6872c91bce35 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -35,7 +35,7 @@ described below will work.
35 35
36The most general way to create a file within a debugfs directory is with: 36The most general way to create a file within a debugfs directory is with:
37 37
38 struct dentry *debugfs_create_file(const char *name, mode_t mode, 38 struct dentry *debugfs_create_file(const char *name, umode_t mode,
39 struct dentry *parent, void *data, 39 struct dentry *parent, void *data,
40 const struct file_operations *fops); 40 const struct file_operations *fops);
41 41
@@ -53,13 +53,13 @@ actually necessary; the debugfs code provides a number of helper functions
53for simple situations. Files containing a single integer value can be 53for simple situations. Files containing a single integer value can be
54created with any of: 54created with any of:
55 55
56 struct dentry *debugfs_create_u8(const char *name, mode_t mode, 56 struct dentry *debugfs_create_u8(const char *name, umode_t mode,
57 struct dentry *parent, u8 *value); 57 struct dentry *parent, u8 *value);
58 struct dentry *debugfs_create_u16(const char *name, mode_t mode, 58 struct dentry *debugfs_create_u16(const char *name, umode_t mode,
59 struct dentry *parent, u16 *value); 59 struct dentry *parent, u16 *value);
60 struct dentry *debugfs_create_u32(const char *name, mode_t mode, 60 struct dentry *debugfs_create_u32(const char *name, umode_t mode,
61 struct dentry *parent, u32 *value); 61 struct dentry *parent, u32 *value);
62 struct dentry *debugfs_create_u64(const char *name, mode_t mode, 62 struct dentry *debugfs_create_u64(const char *name, umode_t mode,
63 struct dentry *parent, u64 *value); 63 struct dentry *parent, u64 *value);
64 64
65These files support both reading and writing the given value; if a specific 65These files support both reading and writing the given value; if a specific
@@ -67,13 +67,13 @@ file should not be written to, simply set the mode bits accordingly. The
67values in these files are in decimal; if hexadecimal is more appropriate, 67values in these files are in decimal; if hexadecimal is more appropriate,
68the following functions can be used instead: 68the following functions can be used instead:
69 69
70 struct dentry *debugfs_create_x8(const char *name, mode_t mode, 70 struct dentry *debugfs_create_x8(const char *name, umode_t mode,
71 struct dentry *parent, u8 *value); 71 struct dentry *parent, u8 *value);
72 struct dentry *debugfs_create_x16(const char *name, mode_t mode, 72 struct dentry *debugfs_create_x16(const char *name, umode_t mode,
73 struct dentry *parent, u16 *value); 73 struct dentry *parent, u16 *value);
74 struct dentry *debugfs_create_x32(const char *name, mode_t mode, 74 struct dentry *debugfs_create_x32(const char *name, umode_t mode,
75 struct dentry *parent, u32 *value); 75 struct dentry *parent, u32 *value);
76 struct dentry *debugfs_create_x64(const char *name, mode_t mode, 76 struct dentry *debugfs_create_x64(const char *name, umode_t mode,
77 struct dentry *parent, u64 *value); 77 struct dentry *parent, u64 *value);
78 78
79These functions are useful as long as the developer knows the size of the 79These functions are useful as long as the developer knows the size of the
@@ -81,7 +81,7 @@ value to be exported. Some types can have different widths on different
81architectures, though, complicating the situation somewhat. There is a 81architectures, though, complicating the situation somewhat. There is a
82function meant to help out in one special case: 82function meant to help out in one special case:
83 83
84 struct dentry *debugfs_create_size_t(const char *name, mode_t mode, 84 struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
85 struct dentry *parent, 85 struct dentry *parent,
86 size_t *value); 86 size_t *value);
87 87
@@ -90,21 +90,22 @@ a variable of type size_t.
90 90
91Boolean values can be placed in debugfs with: 91Boolean values can be placed in debugfs with:
92 92
93 struct dentry *debugfs_create_bool(const char *name, mode_t mode, 93 struct dentry *debugfs_create_bool(const char *name, umode_t mode,
94 struct dentry *parent, u32 *value); 94 struct dentry *parent, u32 *value);
95 95
96A read on the resulting file will yield either Y (for non-zero values) or 96A read on the resulting file will yield either Y (for non-zero values) or
97N, followed by a newline. If written to, it will accept either upper- or 97N, followed by a newline. If written to, it will accept either upper- or
98lower-case values, or 1 or 0. Any other input will be silently ignored. 98lower-case values, or 1 or 0. Any other input will be silently ignored.
99 99
100Finally, a block of arbitrary binary data can be exported with: 100Another option is exporting a block of arbitrary binary data, with
101this structure and function:
101 102
102 struct debugfs_blob_wrapper { 103 struct debugfs_blob_wrapper {
103 void *data; 104 void *data;
104 unsigned long size; 105 unsigned long size;
105 }; 106 };
106 107
107 struct dentry *debugfs_create_blob(const char *name, mode_t mode, 108 struct dentry *debugfs_create_blob(const char *name, umode_t mode,
108 struct dentry *parent, 109 struct dentry *parent,
109 struct debugfs_blob_wrapper *blob); 110 struct debugfs_blob_wrapper *blob);
110 111
@@ -115,6 +116,35 @@ can be used to export binary information, but there does not appear to be
115any code which does so in the mainline. Note that all files created with 116any code which does so in the mainline. Note that all files created with
116debugfs_create_blob() are read-only. 117debugfs_create_blob() are read-only.
117 118
119If you want to dump a block of registers (something that happens quite
120often during development, even if little such code reaches mainline.
121Debugfs offers two functions: one to make a registers-only file, and
122another to insert a register block in the middle of another sequential
123file.
124
125 struct debugfs_reg32 {
126 char *name;
127 unsigned long offset;
128 };
129
130 struct debugfs_regset32 {
131 struct debugfs_reg32 *regs;
132 int nregs;
133 void __iomem *base;
134 };
135
136 struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
137 struct dentry *parent,
138 struct debugfs_regset32 *regset);
139
140 int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
141 int nregs, void __iomem *base, char *prefix);
142
143The "base" argument may be 0, but you may want to build the reg32 array
144using __stringify, and a number of register names (macros) are actually
145byte offsets over a base for the register block.
146
147
118There are a couple of other directory-oriented helper functions: 148There are a couple of other directory-oriented helper functions:
119 149
120 struct dentry *debugfs_rename(struct dentry *old_dir, 150 struct dentry *debugfs_rename(struct dentry *old_dir,
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 4917cf24a5e0..10ec4639f152 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
581 behaviour may change in the future as it is 581 behaviour may change in the future as it is
582 not necessary and has been done this way only 582 not necessary and has been done this way only
583 for sake of simplicity. 583 for sake of simplicity.
584
585 EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number
586 of blocks of resized filesystem is passed in via
587 64 bit integer argument. The kernel allocates
588 bitmaps and inode table, the userspace tool thus
589 just passes the new number of blocks.
590
584.............................................................................. 591..............................................................................
585 592
586References 593References
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0ec91f03422e..a76a26a1db8a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -41,6 +41,8 @@ Table of Contents
41 3.5 /proc/<pid>/mountinfo - Information about mounts 41 3.5 /proc/<pid>/mountinfo - Information about mounts
42 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm 42 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm
43 43
44 4 Configuring procfs
45 4.1 Mount options
44 46
45------------------------------------------------------------------------------ 47------------------------------------------------------------------------------
46Preface 48Preface
@@ -305,6 +307,9 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
305 blkio_ticks time spent waiting for block IO 307 blkio_ticks time spent waiting for block IO
306 gtime guest time of the task in jiffies 308 gtime guest time of the task in jiffies
307 cgtime guest time of the task children in jiffies 309 cgtime guest time of the task children in jiffies
310 start_data address above which program data+bss is placed
311 end_data address below which program data+bss is placed
312 start_brk address above which program heap can be expanded with brk()
308.............................................................................. 313..............................................................................
309 314
310The /proc/PID/maps file containing the currently mapped memory regions and 315The /proc/PID/maps file containing the currently mapped memory regions and
@@ -1542,3 +1547,40 @@ a task to set its own or one of its thread siblings comm value. The comm value
1542is limited in size compared to the cmdline value, so writing anything longer 1547is limited in size compared to the cmdline value, so writing anything longer
1543then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated 1548then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
1544comm value. 1549comm value.
1550
1551
1552------------------------------------------------------------------------------
1553Configuring procfs
1554------------------------------------------------------------------------------
1555
15564.1 Mount options
1557---------------------
1558
1559The following mount options are supported:
1560
1561 hidepid= Set /proc/<pid>/ access mode.
1562 gid= Set the group authorized to learn processes information.
1563
1564hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
1565(default).
1566
1567hidepid=1 means users may not access any /proc/<pid>/ directories but their
1568own. Sensitive files like cmdline, sched*, status are now protected against
1569other users. This makes it impossible to learn whether any user runs
1570specific program (given the program doesn't reveal itself by its behaviour).
1571As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users,
1572poorly written programs passing sensitive information via program arguments are
1573now protected against local eavesdroppers.
1574
1575hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other
1576users. It doesn't mean that it hides a fact whether a process with a specific
1577pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"),
1578but it hides process' uid and gid, which may be learned by stat()'ing
1579/proc/<pid>/ otherwise. It greatly complicates an intruder's task of gathering
1580information about running processes, whether some daemon runs with elevated
1581privileges, whether other user runs some sensitive program, whether other users
1582run any program at all, etc.
1583
1584gid= defines a group authorized to learn processes information otherwise
1585prohibited by hidepid=. If you use some daemon like identd which needs to learn
1586information about processes information, just add identd to this group.
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 07235caec22c..a6619b7064b9 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -70,7 +70,7 @@ An attribute definition is simply:
70struct attribute { 70struct attribute {
71 char * name; 71 char * name;
72 struct module *owner; 72 struct module *owner;
73 mode_t mode; 73 umode_t mode;
74}; 74};
75 75
76 76
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 43cbd0821721..3d9393b845b8 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -225,7 +225,7 @@ struct super_operations {
225 void (*clear_inode) (struct inode *); 225 void (*clear_inode) (struct inode *);
226 void (*umount_begin) (struct super_block *); 226 void (*umount_begin) (struct super_block *);
227 227
228 int (*show_options)(struct seq_file *, struct vfsmount *); 228 int (*show_options)(struct seq_file *, struct dentry *);
229 229
230 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 230 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
231 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 231 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
@@ -341,14 +341,14 @@ This describes how the VFS can manipulate an inode in your
341filesystem. As of kernel 2.6.22, the following members are defined: 341filesystem. As of kernel 2.6.22, the following members are defined:
342 342
343struct inode_operations { 343struct inode_operations {
344 int (*create) (struct inode *,struct dentry *,int, struct nameidata *); 344 int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *);
345 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); 345 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
346 int (*link) (struct dentry *,struct inode *,struct dentry *); 346 int (*link) (struct dentry *,struct inode *,struct dentry *);
347 int (*unlink) (struct inode *,struct dentry *); 347 int (*unlink) (struct inode *,struct dentry *);
348 int (*symlink) (struct inode *,struct dentry *,const char *); 348 int (*symlink) (struct inode *,struct dentry *,const char *);
349 int (*mkdir) (struct inode *,struct dentry *,int); 349 int (*mkdir) (struct inode *,struct dentry *,umode_t);
350 int (*rmdir) (struct inode *,struct dentry *); 350 int (*rmdir) (struct inode *,struct dentry *);
351 int (*mknod) (struct inode *,struct dentry *,int,dev_t); 351 int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
352 int (*rename) (struct inode *, struct dentry *, 352 int (*rename) (struct inode *, struct dentry *,
353 struct inode *, struct dentry *); 353 struct inode *, struct dentry *);
354 int (*readlink) (struct dentry *, char __user *,int); 354 int (*readlink) (struct dentry *, char __user *,int);
diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus
index 15ac911ce51b..d28b591753d1 100644
--- a/Documentation/hwmon/pmbus
+++ b/Documentation/hwmon/pmbus
@@ -2,9 +2,8 @@ Kernel driver pmbus
2==================== 2====================
3 3
4Supported chips: 4Supported chips:
5 * Ericsson BMR45X series 5 * Ericsson BMR453, BMR454
6 DC/DC Converter 6 Prefixes: 'bmr453', 'bmr454'
7 Prefixes: 'bmr450', 'bmr451', 'bmr453', 'bmr454'
8 Addresses scanned: - 7 Addresses scanned: -
9 Datasheet: 8 Datasheet:
10 http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146395 9 http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146395
diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100
index 7617798b5c97..51f76a189fee 100644
--- a/Documentation/hwmon/zl6100
+++ b/Documentation/hwmon/zl6100
@@ -6,6 +6,10 @@ Supported chips:
6 Prefix: 'zl2004' 6 Prefix: 'zl2004'
7 Addresses scanned: - 7 Addresses scanned: -
8 Datasheet: http://www.intersil.com/data/fn/fn6847.pdf 8 Datasheet: http://www.intersil.com/data/fn/fn6847.pdf
9 * Intersil / Zilker Labs ZL2005
10 Prefix: 'zl2005'
11 Addresses scanned: -
12 Datasheet: http://www.intersil.com/data/fn/fn6848.pdf
9 * Intersil / Zilker Labs ZL2006 13 * Intersil / Zilker Labs ZL2006
10 Prefix: 'zl2006' 14 Prefix: 'zl2006'
11 Addresses scanned: - 15 Addresses scanned: -
@@ -30,6 +34,17 @@ Supported chips:
30 Prefix: 'zl6105' 34 Prefix: 'zl6105'
31 Addresses scanned: - 35 Addresses scanned: -
32 Datasheet: http://www.intersil.com/data/fn/fn6906.pdf 36 Datasheet: http://www.intersil.com/data/fn/fn6906.pdf
37 * Ericsson BMR450, BMR451
38 Prefix: 'bmr450', 'bmr451'
39 Addresses scanned: -
40 Datasheet:
41http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401
42 * Ericsson BMR462, BMR463, BMR464
43 Prefixes: 'bmr462', 'bmr463', 'bmr464'
44 Addresses scanned: -
45 Datasheet:
46http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256
47
33 48
34Author: Guenter Roeck <guenter.roeck@ericsson.com> 49Author: Guenter Roeck <guenter.roeck@ericsson.com>
35 50
diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt
new file mode 100644
index 000000000000..f274c28b5103
--- /dev/null
+++ b/Documentation/input/alps.txt
@@ -0,0 +1,188 @@
1ALPS Touchpad Protocol
2----------------------
3
4Introduction
5------------
6
7Currently the ALPS touchpad driver supports four protocol versions in use by
8ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various
9protocol versions is contained in the following sections.
10
11Detection
12---------
13
14All ALPS touchpads should respond to the "E6 report" command sequence:
15E8-E6-E6-E6-E9. An ALPS touchpad should respond with either 00-00-0A or
1600-00-64.
17
18If the E6 report is successful, the touchpad model is identified using the "E7
19report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is
20matched against known models in the alps_model_data_array.
21
22With protocol versions 3 and 4, the E7 report model signature is always
2373-02-64. To differentiate between these versions, the response from the
24"Enter Command Mode" sequence must be inspected as described below.
25
26Command Mode
27------------
28
29Protocol versions 3 and 4 have a command mode that is used to read and write
30one-byte device registers in a 16-bit address space. The command sequence
31EC-EC-EC-E9 places the device in command mode, and the device will respond
32with 88-07 followed by a third byte. This third byte can be used to determine
33whether the devices uses the version 3 or 4 protocol.
34
35To exit command mode, PSMOUSE_CMD_SETSTREAM (EA) is sent to the touchpad.
36
37While in command mode, register addresses can be set by first sending a
38specific command, either EC for v3 devices or F5 for v4 devices. Then the
39address is sent one nibble at a time, where each nibble is encoded as a
40command with optional data. This enoding differs slightly between the v3 and
41v4 protocols.
42
43Once an address has been set, the addressed register can be read by sending
44PSMOUSE_CMD_GETINFO (E9). The first two bytes of the response contains the
45address of the register being read, and the third contains the value of the
46register. Registers are written by writing the value one nibble at a time
47using the same encoding used for addresses.
48
49Packet Format
50-------------
51
52In the following tables, the following notation is used.
53
54 CAPITALS = stick, miniscules = touchpad
55
56?'s can have different meanings on different models, such as wheel rotation,
57extra buttons, stick buttons on a dualpoint, etc.
58
59PS/2 packet format
60------------------
61
62 byte 0: 0 0 YSGN XSGN 1 M R L
63 byte 1: X7 X6 X5 X4 X3 X2 X1 X0
64 byte 2: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
65
66Note that the device never signals overflow condition.
67
68ALPS Absolute Mode - Protocol Verion 1
69--------------------------------------
70
71 byte 0: 1 0 0 0 1 x9 x8 x7
72 byte 1: 0 x6 x5 x4 x3 x2 x1 x0
73 byte 2: 0 ? ? l r ? fin ges
74 byte 3: 0 ? ? ? ? y9 y8 y7
75 byte 4: 0 y6 y5 y4 y3 y2 y1 y0
76 byte 5: 0 z6 z5 z4 z3 z2 z1 z0
77
78ALPS Absolute Mode - Protocol Version 2
79---------------------------------------
80
81 byte 0: 1 ? ? ? 1 ? ? ?
82 byte 1: 0 x6 x5 x4 x3 x2 x1 x0
83 byte 2: 0 x10 x9 x8 x7 ? fin ges
84 byte 3: 0 y9 y8 y7 1 M R L
85 byte 4: 0 y6 y5 y4 y3 y2 y1 y0
86 byte 5: 0 z6 z5 z4 z3 z2 z1 z0
87
88Dualpoint device -- interleaved packet format
89---------------------------------------------
90
91 byte 0: 1 1 0 0 1 1 1 1
92 byte 1: 0 x6 x5 x4 x3 x2 x1 x0
93 byte 2: 0 x10 x9 x8 x7 0 fin ges
94 byte 3: 0 0 YSGN XSGN 1 1 1 1
95 byte 4: X7 X6 X5 X4 X3 X2 X1 X0
96 byte 5: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
97 byte 6: 0 y9 y8 y7 1 m r l
98 byte 7: 0 y6 y5 y4 y3 y2 y1 y0
99 byte 8: 0 z6 z5 z4 z3 z2 z1 z0
100
101ALPS Absolute Mode - Protocol Version 3
102---------------------------------------
103
104ALPS protocol version 3 has three different packet formats. The first two are
105associated with touchpad events, and the third is associatd with trackstick
106events.
107
108The first type is the touchpad position packet.
109
110 byte 0: 1 ? x1 x0 1 1 1 1
111 byte 1: 0 x10 x9 x8 x7 x6 x5 x4
112 byte 2: 0 y10 y9 y8 y7 y6 y5 y4
113 byte 3: 0 M R L 1 m r l
114 byte 4: 0 mt x3 x2 y3 y2 y1 y0
115 byte 5: 0 z6 z5 z4 z3 z2 z1 z0
116
117Note that for some devices the trackstick buttons are reported in this packet,
118and on others it is reported in the trackstick packets.
119
120The second packet type contains bitmaps representing the x and y axes. In the
121bitmaps a given bit is set if there is a finger covering that position on the
122given axis. Thus the bitmap packet can be used for low-resolution multi-touch
123data, although finger tracking is not possible. This packet also encodes the
124number of contacts (f1 and f0 in the table below).
125
126 byte 0: 1 1 x1 x0 1 1 1 1
127 byte 1: 0 x8 x7 x6 x5 x4 x3 x2
128 byte 2: 0 y7 y6 y5 y4 y3 y2 y1
129 byte 3: 0 y10 y9 y8 1 1 1 1
130 byte 4: 0 x14 x13 x12 x11 x10 x9 y0
131 byte 5: 0 1 ? ? ? ? f1 f0
132
133This packet only appears after a position packet with the mt bit set, and
134ususally only appears when there are two or more contacts (although
135ocassionally it's seen with only a single contact).
136
137The final v3 packet type is the trackstick packet.
138
139 byte 0: 1 1 x7 y7 1 1 1 1
140 byte 1: 0 x6 x5 x4 x3 x2 x1 x0
141 byte 2: 0 y6 y5 y4 y3 y2 y1 y0
142 byte 3: 0 1 0 0 1 0 0 0
143 byte 4: 0 z4 z3 z2 z1 z0 ? ?
144 byte 5: 0 0 1 1 1 1 1 1
145
146ALPS Absolute Mode - Protocol Version 4
147---------------------------------------
148
149Protocol version 4 has an 8-byte packet format.
150
151 byte 0: 1 ? x1 x0 1 1 1 1
152 byte 1: 0 x10 x9 x8 x7 x6 x5 x4
153 byte 2: 0 y10 y9 y8 y7 y6 y5 y4
154 byte 3: 0 1 x3 x2 y3 y2 y1 y0
155 byte 4: 0 ? ? ? 1 ? r l
156 byte 5: 0 z6 z5 z4 z3 z2 z1 z0
157 byte 6: bitmap data (described below)
158 byte 7: bitmap data (described below)
159
160The last two bytes represent a partial bitmap packet, with 3 full packets
161required to construct a complete bitmap packet. Once assembled, the 6-byte
162bitmap packet has the following format:
163
164 byte 0: 0 1 x7 x6 x5 x4 x3 x2
165 byte 1: 0 x1 x0 y4 y3 y2 y1 y0
166 byte 2: 0 0 ? x14 x13 x12 x11 x10
167 byte 3: 0 x9 x8 y9 y8 y7 y6 y5
168 byte 4: 0 0 0 0 0 0 0 0
169 byte 5: 0 0 0 0 0 0 0 y10
170
171There are several things worth noting here.
172
173 1) In the bitmap data, bit 6 of byte 0 serves as a sync byte to
174 identify the first fragment of a bitmap packet.
175
176 2) The bitmaps represent the same data as in the v3 bitmap packets, although
177 the packet layout is different.
178
179 3) There doesn't seem to be a count of the contact points anywhere in the v4
180 protocol packets. Deriving a count of contact points must be done by
181 analyzing the bitmaps.
182
183 4) There is a 3 to 1 ratio of position packets to bitmap packets. Therefore
184 MT position can only be updated for every third ST position update, and
185 the count of contact points can only be updated every third packet as
186 well.
187
188So far no v4 devices with tracksticks have been encountered.
diff --git a/Documentation/input/gpio-tilt.txt b/Documentation/input/gpio-tilt.txt
new file mode 100644
index 000000000000..06d60c3ff5e7
--- /dev/null
+++ b/Documentation/input/gpio-tilt.txt
@@ -0,0 +1,103 @@
1Driver for tilt-switches connected via GPIOs
2============================================
3
4Generic driver to read data from tilt switches connected via gpios.
5Orientation can be provided by one or more than one tilt switches,
6i.e. each tilt switch providing one axis, and the number of axes
7is also not limited.
8
9
10Data structures:
11----------------
12
13The array of struct gpio in the gpios field is used to list the gpios
14that represent the current tilt state.
15
16The array of struct gpio_tilt_axis describes the axes that are reported
17to the input system. The values set therein are used for the
18input_set_abs_params calls needed to init the axes.
19
20The array of struct gpio_tilt_state maps gpio states to the corresponding
21values to report. The gpio state is represented as a bitfield where the
22bit-index corresponds to the index of the gpio in the struct gpio array.
23In the same manner the values stored in the axes array correspond to
24the elements of the gpio_tilt_axis-array.
25
26
27Example:
28--------
29
30Example configuration for a single TS1003 tilt switch that rotates around
31one axis in 4 steps and emitts the current tilt via two GPIOs.
32
33static int sg060_tilt_enable(struct device *dev) {
34 /* code to enable the sensors */
35};
36
37static void sg060_tilt_disable(struct device *dev) {
38 /* code to disable the sensors */
39};
40
41static struct gpio sg060_tilt_gpios[] = {
42 { SG060_TILT_GPIO_SENSOR1, GPIOF_IN, "tilt_sensor1" },
43 { SG060_TILT_GPIO_SENSOR2, GPIOF_IN, "tilt_sensor2" },
44};
45
46static struct gpio_tilt_state sg060_tilt_states[] = {
47 {
48 .gpios = (0 << 1) | (0 << 0),
49 .axes = (int[]) {
50 0,
51 },
52 }, {
53 .gpios = (0 << 1) | (1 << 0),
54 .axes = (int[]) {
55 1, /* 90 degrees */
56 },
57 }, {
58 .gpios = (1 << 1) | (1 << 0),
59 .axes = (int[]) {
60 2, /* 180 degrees */
61 },
62 }, {
63 .gpios = (1 << 1) | (0 << 0),
64 .axes = (int[]) {
65 3, /* 270 degrees */
66 },
67 },
68};
69
70static struct gpio_tilt_axis sg060_tilt_axes[] = {
71 {
72 .axis = ABS_RY,
73 .min = 0,
74 .max = 3,
75 .fuzz = 0,
76 .flat = 0,
77 },
78};
79
80static struct gpio_tilt_platform_data sg060_tilt_pdata= {
81 .gpios = sg060_tilt_gpios,
82 .nr_gpios = ARRAY_SIZE(sg060_tilt_gpios),
83
84 .axes = sg060_tilt_axes,
85 .nr_axes = ARRAY_SIZE(sg060_tilt_axes),
86
87 .states = sg060_tilt_states,
88 .nr_states = ARRAY_SIZE(sg060_tilt_states),
89
90 .debounce_interval = 100,
91
92 .poll_interval = 1000,
93 .enable = sg060_tilt_enable,
94 .disable = sg060_tilt_disable,
95};
96
97static struct platform_device sg060_device_tilt = {
98 .name = "gpio-tilt-polled",
99 .id = -1,
100 .dev = {
101 .platform_data = &sg060_tilt_pdata,
102 },
103};
diff --git a/Documentation/input/sentelic.txt b/Documentation/input/sentelic.txt
index b2ef125b71f8..89251e2a3eba 100644
--- a/Documentation/input/sentelic.txt
+++ b/Documentation/input/sentelic.txt
@@ -1,5 +1,5 @@
1Copyright (C) 2002-2010 Sentelic Corporation. 1Copyright (C) 2002-2011 Sentelic Corporation.
2Last update: Jan-13-2010 2Last update: Dec-07-2011
3 3
4============================================================================== 4==============================================================================
5* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons) 5* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons)
@@ -140,6 +140,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------
140Byte 1: Bit7~Bit6 => 00, Normal data packet 140Byte 1: Bit7~Bit6 => 00, Normal data packet
141 => 01, Absolute coordination packet 141 => 01, Absolute coordination packet
142 => 10, Notify packet 142 => 10, Notify packet
143 => 11, Normal data packet with on-pad click
143 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. 144 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
144 When both fingers are up, the last two reports have zero valid 145 When both fingers are up, the last two reports have zero valid
145 bit. 146 bit.
@@ -164,6 +165,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------
164Byte 1: Bit7~Bit6 => 00, Normal data packet 165Byte 1: Bit7~Bit6 => 00, Normal data packet
165 => 01, Absolute coordinates packet 166 => 01, Absolute coordinates packet
166 => 10, Notify packet 167 => 10, Notify packet
168 => 11, Normal data packet with on-pad click
167 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. 169 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
168 When both fingers are up, the last two reports have zero valid 170 When both fingers are up, the last two reports have zero valid
169 bit. 171 bit.
@@ -188,6 +190,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------
188Byte 1: Bit7~Bit6 => 00, Normal data packet 190Byte 1: Bit7~Bit6 => 00, Normal data packet
189 => 01, Absolute coordinates packet 191 => 01, Absolute coordinates packet
190 => 10, Notify packet 192 => 10, Notify packet
193 => 11, Normal data packet with on-pad click
191 Bit5 => 1 194 Bit5 => 1
192 Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): 195 Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1):
193 0: left button is generated by the on-pad command 196 0: left button is generated by the on-pad command
@@ -205,7 +208,7 @@ Byte 4: Bit7 => scroll right button
205 Bit6 => scroll left button 208 Bit6 => scroll left button
206 Bit5 => scroll down button 209 Bit5 => scroll down button
207 Bit4 => scroll up button 210 Bit4 => scroll up button
208 * Note that if gesture and additional buttoni (Bit4~Bit7) 211 * Note that if gesture and additional button (Bit4~Bit7)
209 happen at the same time, the button information will not 212 happen at the same time, the button information will not
210 be sent. 213 be sent.
211 Bit3~Bit0 => Reserved 214 Bit3~Bit0 => Reserved
@@ -227,6 +230,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------
227Byte 1: Bit7~Bit6 => 00, Normal data packet 230Byte 1: Bit7~Bit6 => 00, Normal data packet
228 => 01, Absolute coordinates packet 231 => 01, Absolute coordinates packet
229 => 10, Notify packet 232 => 10, Notify packet
233 => 11, Normal data packet with on-pad click
230 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. 234 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
231 When both fingers are up, the last two reports have zero valid 235 When both fingers are up, the last two reports have zero valid
232 bit. 236 bit.
@@ -253,6 +257,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------
253Byte 1: Bit7~Bit6 => 00, Normal data packet 257Byte 1: Bit7~Bit6 => 00, Normal data packet
254 => 01, Absolute coordination packet 258 => 01, Absolute coordination packet
255 => 10, Notify packet 259 => 10, Notify packet
260 => 11, Normal data packet with on-pad click
256 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. 261 Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
257 When both fingers are up, the last two reports have zero valid 262 When both fingers are up, the last two reports have zero valid
258 bit. 263 bit.
@@ -279,8 +284,9 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------
279Byte 1: Bit7~Bit6 => 00, Normal data packet 284Byte 1: Bit7~Bit6 => 00, Normal data packet
280 => 01, Absolute coordination packet 285 => 01, Absolute coordination packet
281 => 10, Notify packet 286 => 10, Notify packet
287 => 11, Normal data packet with on-pad click
282 Bit5 => 1 288 Bit5 => 1
283 Bit4 => when in absolute coordinate mode (valid when EN_PKT_GO is 1): 289 Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1):
284 0: left button is generated by the on-pad command 290 0: left button is generated by the on-pad command
285 1: left button is generated by the external button 291 1: left button is generated by the external button
286 Bit3 => 1 292 Bit3 => 1
@@ -307,6 +313,110 @@ Sample sequence of Multi-finger, Multi-coordinate mode:
307 abs pkt 2, ..., notify packet (valid bit == 0) 313 abs pkt 2, ..., notify packet (valid bit == 0)
308 314
309============================================================================== 315==============================================================================
316* Absolute position for STL3888-Cx and STL3888-Dx.
317==============================================================================
318Single Finger, Absolute Coordinate Mode (SFAC)
319 Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
320BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
321 1 |0|1|0|P|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y|
322 |---------------| |---------------| |---------------| |---------------|
323
324Byte 1: Bit7~Bit6 => 00, Normal data packet
325 => 01, Absolute coordinates packet
326 => 10, Notify packet
327 Bit5 => Coordinate mode(always 0 in SFAC mode):
328 0: single-finger absolute coordinates (SFAC) mode
329 1: multi-finger, multiple coordinates (MFMC) mode
330 Bit4 => 0: The LEFT button is generated by on-pad command (OPC)
331 1: The LEFT button is generated by external button
332 Default is 1 even if the LEFT button is not pressed.
333 Bit3 => Always 1, as specified by PS/2 protocol.
334 Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
335 Bit1 => Right Button, 1 is pressed, 0 is not pressed.
336 Bit0 => Left Button, 1 is pressed, 0 is not pressed.
337Byte 2: X coordinate (xpos[9:2])
338Byte 3: Y coordinate (ypos[9:2])
339Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
340 Bit3~Bit2 => X coordinate (ypos[1:0])
341 Bit4 => 4th mouse button(forward one page)
342 Bit5 => 5th mouse button(backward one page)
343 Bit6 => scroll left button
344 Bit7 => scroll right button
345
346Multi Finger, Multiple Coordinates Mode (MFMC):
347 Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
348BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
349 1 |0|1|1|P|1|F|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y|
350 |---------------| |---------------| |---------------| |---------------|
351
352Byte 1: Bit7~Bit6 => 00, Normal data packet
353 => 01, Absolute coordination packet
354 => 10, Notify packet
355 Bit5 => Coordinate mode (always 1 in MFMC mode):
356 0: single-finger absolute coordinates (SFAC) mode
357 1: multi-finger, multiple coordinates (MFMC) mode
358 Bit4 => 0: The LEFT button is generated by on-pad command (OPC)
359 1: The LEFT button is generated by external button
360 Default is 1 even if the LEFT button is not pressed.
361 Bit3 => Always 1, as specified by PS/2 protocol.
362 Bit2 => Finger index, 0 is the first finger, 1 is the second finger.
363 If bit 1 and 0 are all 1 and bit 4 is 0, the middle external
364 button is pressed.
365 Bit1 => Right Button, 1 is pressed, 0 is not pressed.
366 Bit0 => Left Button, 1 is pressed, 0 is not pressed.
367Byte 2: X coordinate (xpos[9:2])
368Byte 3: Y coordinate (ypos[9:2])
369Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
370 Bit3~Bit2 => X coordinate (ypos[1:0])
371 Bit4 => 4th mouse button(forward one page)
372 Bit5 => 5th mouse button(backward one page)
373 Bit6 => scroll left button
374 Bit7 => scroll right button
375
376 When one of the two fingers is up, the device will output four consecutive
377MFMC#0 report packets with zero X and Y to represent 1st finger is up or
378four consecutive MFMC#1 report packets with zero X and Y to represent that
379the 2nd finger is up. On the other hand, if both fingers are up, the device
380will output four consecutive single-finger, absolute coordinate(SFAC) packets
381with zero X and Y.
382
383Notify Packet for STL3888-Cx/Dx
384 Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
385BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
386 1 |1|0|0|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0|
387 |---------------| |---------------| |---------------| |---------------|
388
389Byte 1: Bit7~Bit6 => 00, Normal data packet
390 => 01, Absolute coordinates packet
391 => 10, Notify packet
392 Bit5 => Always 0
393 Bit4 => 0: The LEFT button is generated by on-pad command(OPC)
394 1: The LEFT button is generated by external button
395 Default is 1 even if the LEFT button is not pressed.
396 Bit3 => 1
397 Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
398 Bit1 => Right Button, 1 is pressed, 0 is not pressed.
399 Bit0 => Left Button, 1 is pressed, 0 is not pressed.
400Byte 2: Message type:
401 0xba => gesture information
402 0xc0 => one finger hold-rotating gesture
403Byte 3: The first parameter for the received message:
404 0xba => gesture ID (refer to the 'Gesture ID' section)
405 0xc0 => region ID
406Byte 4: The second parameter for the received message:
407 0xba => N/A
408 0xc0 => finger up/down information
409
410Sample sequence of Multi-finger, Multi-coordinates mode:
411
412 notify packet (valid bit == 1), MFMC packet 1 (byte 1, bit 2 == 0),
413 MFMC packet 2 (byte 1, bit 2 == 1), MFMC packet 1, MFMC packet 2,
414 ..., notify packet (valid bit == 0)
415
416 That is, when the device is in MFMC mode, the host will receive
417 interleaved absolute coordinate packets for each finger.
418
419==============================================================================
310* FSP Enable/Disable packet 420* FSP Enable/Disable packet
311============================================================================== 421==============================================================================
312 Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 422 Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
@@ -348,9 +458,10 @@ http://www.computer-engineering.org/ps2mouse/
348============================================================================== 458==============================================================================
3491. Identify FSP by reading device ID(0x00) and version(0x01) register 4591. Identify FSP by reading device ID(0x00) and version(0x01) register
350 460
3512. Determine number of buttons by reading status2 (0x0b) register 4612a. For FSP version < STL3888 Cx, determine number of buttons by reading
462 the 'test mode status' (0x20) register:
352 463
353 buttons = reg[0x0b] & 0x30 464 buttons = reg[0x20] & 0x30
354 465
355 if buttons == 0x30 or buttons == 0x20: 466 if buttons == 0x30 or buttons == 0x20:
356 # two/four buttons 467 # two/four buttons
@@ -365,6 +476,10 @@ http://www.computer-engineering.org/ps2mouse/
365 Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' 476 Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
366 section A for packet parsing detail 477 section A for packet parsing detail
367 478
4792b. For FSP version >= STL3888 Cx:
480 Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
481 section A for packet parsing detail (ignore byte 4, bit ~ 7)
482
368============================================================================== 483==============================================================================
369* Programming Sequence for Register Reading/Writing 484* Programming Sequence for Register Reading/Writing
370============================================================================== 485==============================================================================
@@ -374,7 +489,7 @@ Register inversion requirement:
374 Following values needed to be inverted(the '~' operator in C) before being 489 Following values needed to be inverted(the '~' operator in C) before being
375sent to FSP: 490sent to FSP:
376 491
377 0xe9, 0xee, 0xf2 and 0xff. 492 0xe8, 0xe9, 0xee, 0xf2, 0xf3 and 0xff.
378 493
379Register swapping requirement: 494Register swapping requirement:
380 495
@@ -415,7 +530,18 @@ Register reading sequence:
415 530
416 8. send 0xe9(status request) PS/2 command to FSP; 531 8. send 0xe9(status request) PS/2 command to FSP;
417 532
418 9. the response read from FSP should be the requested register value. 533 9. the 4th byte of the response read from FSP should be the
534 requested register value(?? indicates don't care byte):
535
536 host: 0xe9
537 3888: 0xfa (??) (??) (val)
538
539 * Note that since the Cx release, the hardware will return 1's
540 complement of the register value at the 3rd byte of status request
541 result:
542
543 host: 0xe9
544 3888: 0xfa (??) (~val) (val)
419 545
420Register writing sequence: 546Register writing sequence:
421 547
@@ -465,71 +591,194 @@ Register writing sequence:
465 591
466 9. the register writing sequence is completed. 592 9. the register writing sequence is completed.
467 593
594 * Note that since the Cx release, the hardware will return 1's
595 complement of the register value at the 3rd byte of status request
596 result. Host can optionally send another 0xe9 (status request) PS/2
597 command to FSP at the end of register writing to verify that the
598 register writing operation is successful (?? indicates don't care
599 byte):
600
601 host: 0xe9
602 3888: 0xfa (??) (~val) (val)
603
604==============================================================================
605* Programming Sequence for Page Register Reading/Writing
606==============================================================================
607
608 In order to overcome the limitation of maximum number of registers
609supported, the hardware separates register into different groups called
610'pages.' Each page is able to include up to 255 registers.
611
612 The default page after power up is 0x82; therefore, if one has to get
613access to register 0x8301, one has to use following sequence to switch
614to page 0x83, then start reading/writing from/to offset 0x01 by using
615the register read/write sequence described in previous section.
616
617Page register reading sequence:
618
619 1. send 0xf3 PS/2 command to FSP;
620
621 2. send 0x66 PS/2 command to FSP;
622
623 3. send 0x88 PS/2 command to FSP;
624
625 4. send 0xf3 PS/2 command to FSP;
626
627 5. send 0x83 PS/2 command to FSP;
628
629 6. send 0x88 PS/2 command to FSP;
630
631 7. send 0xe9(status request) PS/2 command to FSP;
632
633 8. the response read from FSP should be the requested page value.
634
635Page register writing sequence:
636
637 1. send 0xf3 PS/2 command to FSP;
638
639 2. send 0x38 PS/2 command to FSP;
640
641 3. send 0x88 PS/2 command to FSP;
642
643 4. send 0xf3 PS/2 command to FSP;
644
645 5. if the page address being written is not required to be
646 inverted(refer to the 'Register inversion requirement' section),
647 goto step 6
648
649 5a. send 0x47 PS/2 command to FSP;
650
651 5b. send the inverted page address to FSP and goto step 9;
652
653 6. if the page address being written is not required to be
654 swapped(refer to the 'Register swapping requirement' section),
655 goto step 7
656
657 6a. send 0x44 PS/2 command to FSP;
658
659 6b. send the swapped page address to FSP and goto step 9;
660
661 7. send 0x33 PS/2 command to FSP;
662
663 8. send the page address to FSP;
664
665 9. the page register writing sequence is completed.
666
667==============================================================================
668* Gesture ID
669==============================================================================
670
671 Unlike other devices which sends multiple fingers' coordinates to host,
672FSP processes multiple fingers' coordinates internally and convert them
673into a 8 bits integer, namely 'Gesture ID.' Following is a list of
674supported gesture IDs:
675
676 ID Description
677 0x86 2 finger straight up
678 0x82 2 finger straight down
679 0x80 2 finger straight right
680 0x84 2 finger straight left
681 0x8f 2 finger zoom in
682 0x8b 2 finger zoom out
683 0xc0 2 finger curve, counter clockwise
684 0xc4 2 finger curve, clockwise
685 0x2e 3 finger straight up
686 0x2a 3 finger straight down
687 0x28 3 finger straight right
688 0x2c 3 finger straight left
689 0x38 palm
690
468============================================================================== 691==============================================================================
469* Register Listing 692* Register Listing
470============================================================================== 693==============================================================================
471 694
695 Registers are represented in 16 bits values. The higher 8 bits represent
696the page address and the lower 8 bits represent the relative offset within
697that particular page. Refer to the 'Programming Sequence for Page Register
698Reading/Writing' section for instructions on how to change current page
699address.
700
472offset width default r/w name 701offset width default r/w name
4730x00 bit7~bit0 0x01 RO device ID 7020x8200 bit7~bit0 0x01 RO device ID
474 703
4750x01 bit7~bit0 0xc0 RW version ID 7040x8201 bit7~bit0 RW version ID
705 0xc1: STL3888 Ax
706 0xd0 ~ 0xd2: STL3888 Bx
707 0xe0 ~ 0xe1: STL3888 Cx
708 0xe2 ~ 0xe3: STL3888 Dx
476 709
4770x02 bit7~bit0 0x01 RO vendor ID 7100x8202 bit7~bit0 0x01 RO vendor ID
478 711
4790x03 bit7~bit0 0x01 RO product ID 7120x8203 bit7~bit0 0x01 RO product ID
480 713
4810x04 bit3~bit0 0x01 RW revision ID 7140x8204 bit3~bit0 0x01 RW revision ID
482 715
4830x0b RO test mode status 1 7160x820b test mode status 1
484 bit3 1 RO 0: rotate 180 degree, 1: no rotation 717 bit3 1 RO 0: rotate 180 degree
718 1: no rotation
719 *only supported by H/W prior to Cx
485 720
486 bit5~bit4 RO number of buttons 7210x820f register file page control
487 11 => 2, lbtn/rbtn 722 bit2 0 RW 1: rotate 180 degree
488 10 => 4, lbtn/rbtn/scru/scrd 723 0: no rotation
489 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr 724 *supported since Cx
490 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn
491 725
4920x0f RW register file page control
493 bit0 0 RW 1 to enable page 1 register files 726 bit0 0 RW 1 to enable page 1 register files
727 *only supported by H/W prior to Cx
494 728
4950x10 RW system control 1 7290x8210 RW system control 1
496 bit0 1 RW Reserved, must be 1 730 bit0 1 RW Reserved, must be 1
497 bit1 0 RW Reserved, must be 0 731 bit1 0 RW Reserved, must be 0
498 bit4 1 RW Reserved, must be 0 732 bit4 0 RW Reserved, must be 0
499 bit5 0 RW register clock gating enable 733 bit5 1 RW register clock gating enable
500 0: read only, 1: read/write enable 734 0: read only, 1: read/write enable
501 (Note that following registers does not require clock gating being 735 (Note that following registers does not require clock gating being
502 enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e 736 enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e
503 40 41 42 43. In addition to that, this bit must be 1 when gesture 737 40 41 42 43. In addition to that, this bit must be 1 when gesture
504 mode is enabled) 738 mode is enabled)
505 739
5060x31 RW on-pad command detection 7400x8220 test mode status
741 bit5~bit4 RO number of buttons
742 11 => 2, lbtn/rbtn
743 10 => 4, lbtn/rbtn/scru/scrd
744 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr
745 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn
746 *only supported by H/W prior to Cx
747
7480x8231 RW on-pad command detection
507 bit7 0 RW on-pad command left button down tag 749 bit7 0 RW on-pad command left button down tag
508 enable 750 enable
509 0: disable, 1: enable 751 0: disable, 1: enable
752 *only supported by H/W prior to Cx
510 753
5110x34 RW on-pad command control 5 7540x8234 RW on-pad command control 5
512 bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5 755 bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5
513 (Note that position unit is in 0.5 scanline) 756 (Note that position unit is in 0.5 scanline)
757 *only supported by H/W prior to Cx
514 758
515 bit7 0 RW on-pad tap zone enable 759 bit7 0 RW on-pad tap zone enable
516 0: disable, 1: enable 760 0: disable, 1: enable
761 *only supported by H/W prior to Cx
517 762
5180x35 RW on-pad command control 6 7630x8235 RW on-pad command control 6
519 bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5 764 bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5
520 (Note that position unit is in 0.5 scanline) 765 (Note that position unit is in 0.5 scanline)
766 *only supported by H/W prior to Cx
521 767
5220x36 RW on-pad command control 7 7680x8236 RW on-pad command control 7
523 bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5 769 bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5
524 (Note that position unit is in 0.5 scanline) 770 (Note that position unit is in 0.5 scanline)
771 *only supported by H/W prior to Cx
525 772
5260x37 RW on-pad command control 8 7730x8237 RW on-pad command control 8
527 bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5 774 bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5
528 (Note that position unit is in 0.5 scanline) 775 (Note that position unit is in 0.5 scanline)
776 *only supported by H/W prior to Cx
529 777
5300x40 RW system control 5 7780x8240 RW system control 5
531 bit1 0 RW FSP Intellimouse mode enable 779 bit1 0 RW FSP Intellimouse mode enable
532 0: disable, 1: enable 780 0: disable, 1: enable
781 *only supported by H/W prior to Cx
533 782
534 bit2 0 RW movement + abs. coordinate mode enable 783 bit2 0 RW movement + abs. coordinate mode enable
535 0: disable, 1: enable 784 0: disable, 1: enable
@@ -537,6 +786,7 @@ offset width default r/w name
537 bit 1 is not set. However, the format is different from that of bit 1. 786 bit 1 is not set. However, the format is different from that of bit 1.
538 In addition, when bit 1 and bit 2 are set at the same time, bit 2 will 787 In addition, when bit 1 and bit 2 are set at the same time, bit 2 will
539 override bit 1.) 788 override bit 1.)
789 *only supported by H/W prior to Cx
540 790
541 bit3 0 RW abs. coordinate only mode enable 791 bit3 0 RW abs. coordinate only mode enable
542 0: disable, 1: enable 792 0: disable, 1: enable
@@ -544,9 +794,11 @@ offset width default r/w name
544 bit 1 is not set. However, the format is different from that of bit 1. 794 bit 1 is not set. However, the format is different from that of bit 1.
545 In addition, when bit 1, bit 2 and bit 3 are set at the same time, 795 In addition, when bit 1, bit 2 and bit 3 are set at the same time,
546 bit 3 will override bit 1 and 2.) 796 bit 3 will override bit 1 and 2.)
797 *only supported by H/W prior to Cx
547 798
548 bit5 0 RW auto switch enable 799 bit5 0 RW auto switch enable
549 0: disable, 1: enable 800 0: disable, 1: enable
801 *only supported by H/W prior to Cx
550 802
551 bit6 0 RW G0 abs. + notify packet format enable 803 bit6 0 RW G0 abs. + notify packet format enable
552 0: disable, 1: enable 804 0: disable, 1: enable
@@ -554,18 +806,68 @@ offset width default r/w name
554 bit 2 and 3. That is, if any of those bit is 1, host will receive 806 bit 2 and 3. That is, if any of those bit is 1, host will receive
555 absolute coordinates; otherwise, host only receives packets with 807 absolute coordinates; otherwise, host only receives packets with
556 relative coordinate.) 808 relative coordinate.)
809 *only supported by H/W prior to Cx
557 810
558 bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd 811 bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd
559 finger packet enable 812 finger packet enable
560 0: disable, 1: enable 813 0: disable, 1: enable
814 *only supported by H/W prior to Cx
561 815
5620x43 RW on-pad control 8160x8243 RW on-pad control
563 bit0 0 RW on-pad control enable 817 bit0 0 RW on-pad control enable
564 0: disable, 1: enable 818 0: disable, 1: enable
565 (Note that if this bit is cleared, bit 3/5 will be ineffective) 819 (Note that if this bit is cleared, bit 3/5 will be ineffective)
820 *only supported by H/W prior to Cx
566 821
567 bit3 0 RW on-pad fix vertical scrolling enable 822 bit3 0 RW on-pad fix vertical scrolling enable
568 0: disable, 1: enable 823 0: disable, 1: enable
824 *only supported by H/W prior to Cx
569 825
570 bit5 0 RW on-pad fix horizontal scrolling enable 826 bit5 0 RW on-pad fix horizontal scrolling enable
571 0: disable, 1: enable 827 0: disable, 1: enable
828 *only supported by H/W prior to Cx
829
8300x8290 RW software control register 1
831 bit0 0 RW absolute coordination mode
832 0: disable, 1: enable
833 *supported since Cx
834
835 bit1 0 RW gesture ID output
836 0: disable, 1: enable
837 *supported since Cx
838
839 bit2 0 RW two fingers' coordinates output
840 0: disable, 1: enable
841 *supported since Cx
842
843 bit3 0 RW finger up one packet output
844 0: disable, 1: enable
845 *supported since Cx
846
847 bit4 0 RW absolute coordination continuous mode
848 0: disable, 1: enable
849 *supported since Cx
850
851 bit6~bit5 00 RW gesture group selection
852 00: basic
853 01: suite
854 10: suite pro
855 11: advanced
856 *supported since Cx
857
858 bit7 0 RW Bx packet output compatible mode
859 0: disable, 1: enable *supported since Cx
860 *supported since Cx
861
862
8630x833d RW on-pad command control 1
864 bit7 1 RW on-pad command detection enable
865 0: disable, 1: enable
866 *supported since Cx
867
8680x833e RW on-pad command detection
869 bit7 0 RW on-pad command left button down tag
870 enable. Works only in H/W based PS/2
871 data packet mode.
872 0: disable, 1: enable
873 *supported since Cx
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 7a9e0b4b2903..506c7390c2b9 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -17,8 +17,8 @@ You can use common commands, such as cp and scp, to copy the
17memory image to a dump file on the local disk, or across the network to 17memory image to a dump file on the local disk, or across the network to
18a remote system. 18a remote system.
19 19
20Kdump and kexec are currently supported on the x86, x86_64, ppc64 and ia64 20Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
21architectures. 21and s390x architectures.
22 22
23When the system kernel boots, it reserves a small section of memory for 23When the system kernel boots, it reserves a small section of memory for
24the dump-capture kernel. This ensures that ongoing Direct Memory Access 24the dump-capture kernel. This ensures that ongoing Direct Memory Access
@@ -34,11 +34,18 @@ Similarly on PPC64 machines first 32KB of physical memory is needed for
34booting regardless of where the kernel is loaded and to support 64K page 34booting regardless of where the kernel is loaded and to support 64K page
35size kexec backs up the first 64KB memory. 35size kexec backs up the first 64KB memory.
36 36
37For s390x, when kdump is triggered, the crashkernel region is exchanged
38with the region [0, crashkernel region size] and then the kdump kernel
39runs in [0, crashkernel region size]. Therefore no relocatable kernel is
40needed for s390x.
41
37All of the necessary information about the system kernel's core image is 42All of the necessary information about the system kernel's core image is
38encoded in the ELF format, and stored in a reserved area of memory 43encoded in the ELF format, and stored in a reserved area of memory
39before a crash. The physical address of the start of the ELF header is 44before a crash. The physical address of the start of the ELF header is
40passed to the dump-capture kernel through the elfcorehdr= boot 45passed to the dump-capture kernel through the elfcorehdr= boot
41parameter. 46parameter. Optionally the size of the ELF header can also be passed
47when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
48
42 49
43With the dump-capture kernel, you can access the memory image, or "old 50With the dump-capture kernel, you can access the memory image, or "old
44memory," in two ways: 51memory," in two ways:
@@ -291,6 +298,10 @@ Boot into System Kernel
291 The region may be automatically placed on ia64, see the 298 The region may be automatically placed on ia64, see the
292 dump-capture kernel config option notes above. 299 dump-capture kernel config option notes above.
293 300
301 On s390x, typically use "crashkernel=xxM". The value of xx is dependent
302 on the memory consumption of the kdump system. In general this is not
303 dependent on the memory size of the production system.
304
294Load the Dump-capture Kernel 305Load the Dump-capture Kernel
295============================ 306============================
296 307
@@ -308,6 +319,8 @@ For ppc64:
308 - Use vmlinux 319 - Use vmlinux
309For ia64: 320For ia64:
310 - Use vmlinux or vmlinuz.gz 321 - Use vmlinux or vmlinuz.gz
322For s390x:
323 - Use image or bzImage
311 324
312 325
313If you are using a uncompressed vmlinux image then use following command 326If you are using a uncompressed vmlinux image then use following command
@@ -337,6 +350,8 @@ For i386, x86_64 and ia64:
337For ppc64: 350For ppc64:
338 "1 maxcpus=1 noirqdistrib reset_devices" 351 "1 maxcpus=1 noirqdistrib reset_devices"
339 352
353For s390x:
354 "1 maxcpus=1 cgroup_disable=memory"
340 355
341Notes on loading the dump-capture kernel: 356Notes on loading the dump-capture kernel:
342 357
@@ -362,6 +377,20 @@ Notes on loading the dump-capture kernel:
362 dump. Hence generally it is useful either to build a UP dump-capture 377 dump. Hence generally it is useful either to build a UP dump-capture
363 kernel or specify maxcpus=1 option while loading dump-capture kernel. 378 kernel or specify maxcpus=1 option while loading dump-capture kernel.
364 379
380* For s390x there are two kdump modes: If a ELF header is specified with
381 the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
382 is done on all other architectures. If no elfcorehdr= kernel parameter is
383 specified, the s390x kdump kernel dynamically creates the header. The
384 second mode has the advantage that for CPU and memory hotplug, kdump has
385 not to be reloaded with kexec_load().
386
387* For s390x systems with many attached devices the "cio_ignore" kernel
388 parameter should be used for the kdump kernel in order to prevent allocation
389 of kernel memory for devices that are not relevant for kdump. The same
390 applies to systems that use SCSI/FCP devices. In that case the
391 "allow_lun_scan" zfcp module parameter should be set to zero before
392 setting FCP devices online.
393
365Kernel Panic 394Kernel Panic
366============ 395============
367 396
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 81c287fad79d..eb93fd0ec734 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -329,6 +329,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
329 is a lot of faster 329 is a lot of faster
330 off - do not initialize any AMD IOMMU found in 330 off - do not initialize any AMD IOMMU found in
331 the system 331 the system
332 force_isolation - Force device isolation for all
333 devices. The IOMMU driver is not
334 allowed anymore to lift isolation
335 requirements as needed. This option
336 does not override iommu=pt
332 337
333 amijoy.map= [HW,JOY] Amiga joystick support 338 amijoy.map= [HW,JOY] Amiga joystick support
334 Map of devices attached to JOY0DAT and JOY1DAT 339 Map of devices attached to JOY0DAT and JOY1DAT
@@ -623,6 +628,25 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
623 no_debug_objects 628 no_debug_objects
624 [KNL] Disable object debugging 629 [KNL] Disable object debugging
625 630
631 debug_guardpage_minorder=
632 [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
633 parameter allows control of the order of pages that will
634 be intentionally kept free (and hence protected) by the
635 buddy allocator. Bigger value increase the probability
636 of catching random memory corruption, but reduce the
637 amount of memory for normal system use. The maximum
638 possible value is MAX_ORDER/2. Setting this parameter
639 to 1 or 2 should be enough to identify most random
640 memory corruption problems caused by bugs in kernel or
641 driver code when a CPU writes to (or reads from) a
642 random memory location. Note that there exists a class
643 of memory corruptions problems caused by buggy H/W or
644 F/W or by drivers badly programing DMA (basically when
645 memory is written at bus level and the CPU MMU is
646 bypassed) which are not detectable by
647 CONFIG_DEBUG_PAGEALLOC, hence this option will not help
648 tracking down these problems.
649
626 debugpat [X86] Enable PAT debugging 650 debugpat [X86] Enable PAT debugging
627 651
628 decnet.addr= [HW,NET] 652 decnet.addr= [HW,NET]
@@ -1059,7 +1083,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1059 nomerge 1083 nomerge
1060 forcesac 1084 forcesac
1061 soft 1085 soft
1062 pt [x86, IA-64] 1086 pt [x86, IA-64]
1087 group_mf [x86, IA-64]
1088
1063 1089
1064 io7= [HW] IO7 for Marvel based alpha systems 1090 io7= [HW] IO7 for Marvel based alpha systems
1065 See comment before marvel_specify_io7 in 1091 See comment before marvel_specify_io7 in
@@ -1178,9 +1204,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1178 kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. 1204 kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
1179 Default is 0 (don't ignore, but inject #GP) 1205 Default is 0 (don't ignore, but inject #GP)
1180 1206
1181 kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
1182 Default is 1 (enabled)
1183
1184 kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit 1207 kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
1185 KVM MMU at runtime. 1208 KVM MMU at runtime.
1186 Default is 0 (off) 1209 Default is 0 (off)
@@ -1630,12 +1653,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1630 The default is to return 64-bit inode numbers. 1653 The default is to return 64-bit inode numbers.
1631 1654
1632 nfs.nfs4_disable_idmapping= 1655 nfs.nfs4_disable_idmapping=
1633 [NFSv4] When set, this option disables the NFSv4 1656 [NFSv4] When set to the default of '1', this option
1634 idmapper on the client, but only if the mount 1657 ensures that both the RPC level authentication
1635 is using the 'sec=sys' security flavour. This may 1658 scheme and the NFS level operations agree to use
1636 make migration from legacy NFSv2/v3 systems easier 1659 numeric uids/gids if the mount is using the
1637 provided that the server has the appropriate support. 1660 'sec=sys' security flavour. In effect it is
1638 The default is to always enable NFSv4 idmapping. 1661 disabling idmapping, which can make migration from
1662 legacy NFSv2/v3 systems to NFSv4 easier.
1663 Servers that do not support this mode of operation
1664 will be autodetected by the client, and it will fall
1665 back to using the idmapper.
1666 To turn off this behaviour, set the value to '0'.
1639 1667
1640 nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take 1668 nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take
1641 when a NMI is triggered. 1669 when a NMI is triggered.
@@ -1796,6 +1824,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1796 nomfgpt [X86-32] Disable Multi-Function General Purpose 1824 nomfgpt [X86-32] Disable Multi-Function General Purpose
1797 Timer usage (for AMD Geode machines). 1825 Timer usage (for AMD Geode machines).
1798 1826
1827 nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
1828 shutdown the other cpus. Instead use the REBOOT_VECTOR
1829 irq.
1830
1799 nopat [X86] Disable PAT (page attribute table extension of 1831 nopat [X86] Disable PAT (page attribute table extension of
1800 pagetables) support. 1832 pagetables) support.
1801 1833
@@ -1885,6 +1917,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1885 arch_perfmon: [X86] Force use of architectural 1917 arch_perfmon: [X86] Force use of architectural
1886 perfmon on Intel CPUs instead of the 1918 perfmon on Intel CPUs instead of the
1887 CPU specific event set. 1919 CPU specific event set.
1920 timer: [X86] Force use of architectural NMI
1921 timer mode (see also oprofile.timer
1922 for generic hr timer mode)
1923 [s390] Force legacy basic mode sampling
1924 (report cpu_type "timer")
1888 1925
1889 oops=panic Always panic on oopses. Default is to just kill the 1926 oops=panic Always panic on oopses. Default is to just kill the
1890 process, but there is a small probability of 1927 process, but there is a small probability of
@@ -2362,6 +2399,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2362 2399
2363 slram= [HW,MTD] 2400 slram= [HW,MTD]
2364 2401
2402 slab_max_order= [MM, SLAB]
2403 Determines the maximum allowed order for slabs.
2404 A high setting may cause OOMs due to memory
2405 fragmentation. Defaults to 1 for systems with
2406 more than 32MB of RAM, 0 otherwise.
2407
2365 slub_debug[=options[,slabs]] [MM, SLUB] 2408 slub_debug[=options[,slabs]] [MM, SLUB]
2366 Enabling slub_debug allows one to determine the 2409 Enabling slub_debug allows one to determine the
2367 culprit if slab objects become corrupted. Enabling 2410 culprit if slab objects become corrupted. Enabling
@@ -2632,6 +2675,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2632 [USB] Start with the old device initialization 2675 [USB] Start with the old device initialization
2633 scheme (default 0 = off). 2676 scheme (default 0 = off).
2634 2677
2678 usbcore.usbfs_memory_mb=
2679 [USB] Memory limit (in MB) for buffers allocated by
2680 usbfs (default = 16, 0 = max = 2047).
2681
2635 usbcore.use_both_schemes= 2682 usbcore.use_both_schemes=
2636 [USB] Try the other device initialization scheme 2683 [USB] Try the other device initialization scheme
2637 if the first one fails (default 1 = enabled). 2684 if the first one fails (default 1 = enabled).
@@ -2750,11 +2797,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2750 functions are at fixed addresses, they make nice 2797 functions are at fixed addresses, they make nice
2751 targets for exploits that can control RIP. 2798 targets for exploits that can control RIP.
2752 2799
2753 emulate Vsyscalls turn into traps and are emulated 2800 emulate [default] Vsyscalls turn into traps and are
2754 reasonably safely. 2801 emulated reasonably safely.
2755 2802
2756 native [default] Vsyscalls are native syscall 2803 native Vsyscalls are native syscall instructions.
2757 instructions.
2758 This is a little bit faster than trapping 2804 This is a little bit faster than trapping
2759 and makes a few dynamic recompilers work 2805 and makes a few dynamic recompilers work
2760 better than they would in emulation mode. 2806 better than they would in emulation mode.
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index abf768c681e2..5dbc99c04f6e 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -221,3 +221,66 @@ when the chain is validated for the first time, is then put into a hash
221table, which hash-table can be checked in a lockfree manner. If the 221table, which hash-table can be checked in a lockfree manner. If the
222locking chain occurs again later on, the hash table tells us that we 222locking chain occurs again later on, the hash table tells us that we
223dont have to validate the chain again. 223dont have to validate the chain again.
224
225Troubleshooting:
226----------------
227
228The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
229Exceeding this number will trigger the following lockdep warning:
230
231 (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
232
233By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
234desktop systems have less than 1,000 lock classes, so this warning
235normally results from lock-class leakage or failure to properly
236initialize locks. These two problems are illustrated below:
237
2381. Repeated module loading and unloading while running the validator
239 will result in lock-class leakage. The issue here is that each
240 load of the module will create a new set of lock classes for
241 that module's locks, but module unloading does not remove old
242 classes (see below discussion of reuse of lock classes for why).
243 Therefore, if that module is loaded and unloaded repeatedly,
244 the number of lock classes will eventually reach the maximum.
245
2462. Using structures such as arrays that have large numbers of
247 locks that are not explicitly initialized. For example,
248 a hash table with 8192 buckets where each bucket has its own
249 spinlock_t will consume 8192 lock classes -unless- each spinlock
250 is explicitly initialized at runtime, for example, using the
251 run-time spin_lock_init() as opposed to compile-time initializers
252 such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize
253 the per-bucket spinlocks would guarantee lock-class overflow.
254 In contrast, a loop that called spin_lock_init() on each lock
255 would place all 8192 locks into a single lock class.
256
257 The moral of this story is that you should always explicitly
258 initialize your locks.
259
260One might argue that the validator should be modified to allow
261lock classes to be reused. However, if you are tempted to make this
262argument, first review the code and think through the changes that would
263be required, keeping in mind that the lock classes to be removed are
264likely to be linked into the lock-dependency graph. This turns out to
265be harder to do than to say.
266
267Of course, if you do run out of lock classes, the next thing to do is
268to find the offending lock classes. First, the following command gives
269you the number of lock classes currently in use along with the maximum:
270
271 grep "lock-classes" /proc/lockdep_stats
272
273This command produces the following output on a modest system:
274
275 lock-classes: 748 [max: 8191]
276
277If the number allocated (748 above) increases continually over time,
278then there is likely a leak. The following command can be used to
279identify the leaking lock classes:
280
281 grep "BD" /proc/lockdep
282
283Run the command and save the output, then compare against the output from
284a later run of this command to identify the leakers. This same output
285can also help you find situations where runtime lock initialization has
286been omitted.
diff --git a/Documentation/md.txt b/Documentation/md.txt
index fc94770f44ab..993fba37b7d1 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -357,14 +357,14 @@ Each directory contains:
357 written to, that device. 357 written to, that device.
358 358
359 state 359 state
360 A file recording the current state of the device in the array 360 A file recording the current state of the device in the array
361 which can be a comma separated list of 361 which can be a comma separated list of
362 faulty - device has been kicked from active use due to 362 faulty - device has been kicked from active use due to
363 a detected fault or it has unacknowledged bad 363 a detected fault, or it has unacknowledged bad
364 blocks 364 blocks
365 in_sync - device is a fully in-sync member of the array 365 in_sync - device is a fully in-sync member of the array
366 writemostly - device will only be subject to read 366 writemostly - device will only be subject to read
367 requests if there are no other options. 367 requests if there are no other options.
368 This applies only to raid1 arrays. 368 This applies only to raid1 arrays.
369 blocked - device has failed, and the failure hasn't been 369 blocked - device has failed, and the failure hasn't been
370 acknowledged yet by the metadata handler. 370 acknowledged yet by the metadata handler.
@@ -374,6 +374,13 @@ Each directory contains:
374 This includes spares that are in the process 374 This includes spares that are in the process
375 of being recovered to 375 of being recovered to
376 write_error - device has ever seen a write error. 376 write_error - device has ever seen a write error.
377 want_replacement - device is (mostly) working but probably
378 should be replaced, either due to errors or
379 due to user request.
380 replacement - device is a replacement for another active
381 device with same raid_disk.
382
383
377 This list may grow in future. 384 This list may grow in future.
378 This can be written to. 385 This can be written to.
379 Writing "faulty" simulates a failure on the device. 386 Writing "faulty" simulates a failure on the device.
@@ -386,6 +393,13 @@ Each directory contains:
386 Writing "in_sync" sets the in_sync flag. 393 Writing "in_sync" sets the in_sync flag.
387 Writing "write_error" sets writeerrorseen flag. 394 Writing "write_error" sets writeerrorseen flag.
388 Writing "-write_error" clears writeerrorseen flag. 395 Writing "-write_error" clears writeerrorseen flag.
396 Writing "want_replacement" is allowed at any time except to a
397 replacement device or a spare. It sets the flag.
398 Writing "-want_replacement" is allowed at any time. It clears
399 the flag.
400 Writing "replacement" or "-replacement" is only allowed before
401 starting the array. It sets or clears the flag.
402
389 403
390 This file responds to select/poll. Any change to 'faulty' 404 This file responds to select/poll. Any change to 'faulty'
391 or 'blocked' causes an event. 405 or 'blocked' causes an event.
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index bbce1215434a..9ad9ddeb384c 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -144,6 +144,8 @@ nfc.txt
144 - The Linux Near Field Communication (NFS) subsystem. 144 - The Linux Near Field Communication (NFS) subsystem.
145olympic.txt 145olympic.txt
146 - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. 146 - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
147openvswitch.txt
148 - Open vSwitch developer documentation.
147operstates.txt 149operstates.txt
148 - Overview of network interface operational states. 150 - Overview of network interface operational states.
149packet_mmap.txt 151packet_mmap.txt
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
index c86d03f18a5b..221ad0cdf11f 100644
--- a/Documentation/networking/batman-adv.txt
+++ b/Documentation/networking/batman-adv.txt
@@ -200,15 +200,16 @@ abled during run time. Following log_levels are defined:
200 200
2010 - All debug output disabled 2010 - All debug output disabled
2021 - Enable messages related to routing / flooding / broadcasting 2021 - Enable messages related to routing / flooding / broadcasting
2032 - Enable route or tt entry added / changed / deleted 2032 - Enable messages related to route added / changed / deleted
2043 - Enable all messages 2044 - Enable messages related to translation table operations
2057 - Enable all messages
205 206
206The debug output can be changed at runtime using the file 207The debug output can be changed at runtime using the file
207/sys/class/net/bat0/mesh/log_level. e.g. 208/sys/class/net/bat0/mesh/log_level. e.g.
208 209
209# echo 2 > /sys/class/net/bat0/mesh/log_level 210# echo 2 > /sys/class/net/bat0/mesh/log_level
210 211
211will enable debug messages for when routes or TTs change. 212will enable debug messages for when routes change.
212 213
213 214
214BATCTL 215BATCTL
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 91df678fb7f8..080ad26690ae 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -196,6 +196,23 @@ or, for backwards compatibility, the option value. E.g.,
196 196
197 The parameters are as follows: 197 The parameters are as follows:
198 198
199active_slave
200
201 Specifies the new active slave for modes that support it
202 (active-backup, balance-alb and balance-tlb). Possible values
203 are the name of any currently enslaved interface, or an empty
204 string. If a name is given, the slave and its link must be up in order
205 to be selected as the new active slave. If an empty string is
206 specified, the current active slave is cleared, and a new active
207 slave is selected automatically.
208
209 Note that this is only available through the sysfs interface. No module
210 parameter by this name exists.
211
212 The normal value of this option is the name of the currently
213 active slave, or the empty string if there is no active slave or
214 the current mode does not use an active slave.
215
199ad_select 216ad_select
200 217
201 Specifies the 802.3ad aggregation selection logic to use. The 218 Specifies the 802.3ad aggregation selection logic to use. The
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
index f41ea2405220..1dc1c24a7547 100644
--- a/Documentation/networking/ieee802154.txt
+++ b/Documentation/networking/ieee802154.txt
@@ -78,3 +78,30 @@ in software. This is currently WIP.
78 78
79See header include/net/mac802154.h and several drivers in drivers/ieee802154/. 79See header include/net/mac802154.h and several drivers in drivers/ieee802154/.
80 80
816LoWPAN Linux implementation
82============================
83
84The IEEE 802.15.4 standard specifies an MTU of 128 bytes, yielding about 80
85octets of actual MAC payload once security is turned on, on a wireless link
86with a link throughput of 250 kbps or less. The 6LoWPAN adaptation format
87[RFC4944] was specified to carry IPv6 datagrams over such constrained links,
88taking into account limited bandwidth, memory, or energy resources that are
89expected in applications such as wireless Sensor Networks. [RFC4944] defines
90a Mesh Addressing header to support sub-IP forwarding, a Fragmentation header
91to support the IPv6 minimum MTU requirement [RFC2460], and stateless header
92compression for IPv6 datagrams (LOWPAN_HC1 and LOWPAN_HC2) to reduce the
93relatively large IPv6 and UDP headers down to (in the best case) several bytes.
94
95In Semptember 2011 the standard update was published - [RFC6282].
96It deprecates HC1 and HC2 compression and defines IPHC encoding format which is
97used in this Linux implementation.
98
99All the code related to 6lowpan you may find in files: net/ieee802154/6lowpan.*
100
101To setup 6lowpan interface you need (busybox release > 1.17.0):
1021. Add IEEE802.15.4 interface and initialize PANid;
1032. Add 6lowpan interface by command like:
104 # ip link add link wpan0 name lowpan0 type lowpan
1053. Set MAC (if needs):
106 # ip link set lowpan0 address de:ad:be:ef:ca:fe:ba:be
1074. Bring up 'lowpan0' interface
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c
index 65968fbf1e49..ac5debb2f16c 100644
--- a/Documentation/networking/ifenslave.c
+++ b/Documentation/networking/ifenslave.c
@@ -539,12 +539,14 @@ static int if_getconfig(char *ifname)
539 metric = 0; 539 metric = 0;
540 } else 540 } else
541 metric = ifr.ifr_metric; 541 metric = ifr.ifr_metric;
542 printf("The result of SIOCGIFMETRIC is %d\n", metric);
542 543
543 strcpy(ifr.ifr_name, ifname); 544 strcpy(ifr.ifr_name, ifname);
544 if (ioctl(skfd, SIOCGIFMTU, &ifr) < 0) 545 if (ioctl(skfd, SIOCGIFMTU, &ifr) < 0)
545 mtu = 0; 546 mtu = 0;
546 else 547 else
547 mtu = ifr.ifr_mtu; 548 mtu = ifr.ifr_mtu;
549 printf("The result of SIOCGIFMTU is %d\n", mtu);
548 550
549 strcpy(ifr.ifr_name, ifname); 551 strcpy(ifr.ifr_name, ifname);
550 if (ioctl(skfd, SIOCGIFDSTADDR, &ifr) < 0) { 552 if (ioctl(skfd, SIOCGIFDSTADDR, &ifr) < 0) {
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 589f2da5d545..ad3e80e17b4f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -31,6 +31,16 @@ neigh/default/gc_thresh3 - INTEGER
31 when using large numbers of interfaces and when communicating 31 when using large numbers of interfaces and when communicating
32 with large numbers of directly-connected peers. 32 with large numbers of directly-connected peers.
33 33
34neigh/default/unres_qlen_bytes - INTEGER
35 The maximum number of bytes which may be used by packets
36 queued for each unresolved address by other network layers.
37 (added in linux 3.3)
38
39neigh/default/unres_qlen - INTEGER
40 The maximum number of packets which may be queued for each
41 unresolved address by other network layers.
42 (deprecated in linux 3.3) : use unres_qlen_bytes instead.
43
34mtu_expires - INTEGER 44mtu_expires - INTEGER
35 Time, in seconds, that cached PMTU information is kept. 45 Time, in seconds, that cached PMTU information is kept.
36 46
@@ -165,6 +175,9 @@ tcp_congestion_control - STRING
165 connections. The algorithm "reno" is always available, but 175 connections. The algorithm "reno" is always available, but
166 additional choices may be available based on kernel configuration. 176 additional choices may be available based on kernel configuration.
167 Default is set as part of kernel configuration. 177 Default is set as part of kernel configuration.
178 For passive connections, the listener congestion control choice
179 is inherited.
180 [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ]
168 181
169tcp_cookie_size - INTEGER 182tcp_cookie_size - INTEGER
170 Default size of TCP Cookie Transactions (TCPCT) option, that may be 183 Default size of TCP Cookie Transactions (TCPCT) option, that may be
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
new file mode 100644
index 000000000000..b8a048b8df3a
--- /dev/null
+++ b/Documentation/networking/openvswitch.txt
@@ -0,0 +1,195 @@
1Open vSwitch datapath developer documentation
2=============================================
3
4The Open vSwitch kernel module allows flexible userspace control over
5flow-level packet processing on selected network devices. It can be
6used to implement a plain Ethernet switch, network device bonding,
7VLAN processing, network access control, flow-based network control,
8and so on.
9
10The kernel module implements multiple "datapaths" (analogous to
11bridges), each of which can have multiple "vports" (analogous to ports
12within a bridge). Each datapath also has associated with it a "flow
13table" that userspace populates with "flows" that map from keys based
14on packet headers and metadata to sets of actions. The most common
15action forwards the packet to another vport; other actions are also
16implemented.
17
18When a packet arrives on a vport, the kernel module processes it by
19extracting its flow key and looking it up in the flow table. If there
20is a matching flow, it executes the associated actions. If there is
21no match, it queues the packet to userspace for processing (as part of
22its processing, userspace will likely set up a flow to handle further
23packets of the same type entirely in-kernel).
24
25
26Flow key compatibility
27----------------------
28
29Network protocols evolve over time. New protocols become important
30and existing protocols lose their prominence. For the Open vSwitch
31kernel module to remain relevant, it must be possible for newer
32versions to parse additional protocols as part of the flow key. It
33might even be desirable, someday, to drop support for parsing
34protocols that have become obsolete. Therefore, the Netlink interface
35to Open vSwitch is designed to allow carefully written userspace
36applications to work with any version of the flow key, past or future.
37
38To support this forward and backward compatibility, whenever the
39kernel module passes a packet to userspace, it also passes along the
40flow key that it parsed from the packet. Userspace then extracts its
41own notion of a flow key from the packet and compares it against the
42kernel-provided version:
43
44 - If userspace's notion of the flow key for the packet matches the
45 kernel's, then nothing special is necessary.
46
47 - If the kernel's flow key includes more fields than the userspace
48 version of the flow key, for example if the kernel decoded IPv6
49 headers but userspace stopped at the Ethernet type (because it
50 does not understand IPv6), then again nothing special is
51 necessary. Userspace can still set up a flow in the usual way,
52 as long as it uses the kernel-provided flow key to do it.
53
54 - If the userspace flow key includes more fields than the
55 kernel's, for example if userspace decoded an IPv6 header but
56 the kernel stopped at the Ethernet type, then userspace can
57 forward the packet manually, without setting up a flow in the
58 kernel. This case is bad for performance because every packet
59 that the kernel considers part of the flow must go to userspace,
60 but the forwarding behavior is correct. (If userspace can
61 determine that the values of the extra fields would not affect
62 forwarding behavior, then it could set up a flow anyway.)
63
64How flow keys evolve over time is important to making this work, so
65the following sections go into detail.
66
67
68Flow key format
69---------------
70
71A flow key is passed over a Netlink socket as a sequence of Netlink
72attributes. Some attributes represent packet metadata, defined as any
73information about a packet that cannot be extracted from the packet
74itself, e.g. the vport on which the packet was received. Most
75attributes, however, are extracted from headers within the packet,
76e.g. source and destination addresses from Ethernet, IP, or TCP
77headers.
78
79The <linux/openvswitch.h> header file defines the exact format of the
80flow key attributes. For informal explanatory purposes here, we write
81them as comma-separated strings, with parentheses indicating arguments
82and nesting. For example, the following could represent a flow key
83corresponding to a TCP packet that arrived on vport 1:
84
85 in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4),
86 eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0,
87 frag=no), tcp(src=49163, dst=80)
88
89Often we ellipsize arguments not important to the discussion, e.g.:
90
91 in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
92
93
94Basic rule for evolving flow keys
95---------------------------------
96
97Some care is needed to really maintain forward and backward
98compatibility for applications that follow the rules listed under
99"Flow key compatibility" above.
100
101The basic rule is obvious:
102
103 ------------------------------------------------------------------
104 New network protocol support must only supplement existing flow
105 key attributes. It must not change the meaning of already defined
106 flow key attributes.
107 ------------------------------------------------------------------
108
109This rule does have less-obvious consequences so it is worth working
110through a few examples. Suppose, for example, that the kernel module
111did not already implement VLAN parsing. Instead, it just interpreted
112the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the
113packet. The flow key for any packet with an 802.1Q header would look
114essentially like this, ignoring metadata:
115
116 eth(...), eth_type(0x8100)
117
118Naively, to add VLAN support, it makes sense to add a new "vlan" flow
119key attribute to contain the VLAN tag, then continue to decode the
120encapsulated headers beyond the VLAN tag using the existing field
121definitions. With this change, an TCP packet in VLAN 10 would have a
122flow key much like this:
123
124 eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...)
125
126But this change would negatively affect a userspace application that
127has not been updated to understand the new "vlan" flow key attribute.
128The application could, following the flow compatibility rules above,
129ignore the "vlan" attribute that it does not understand and therefore
130assume that the flow contained IP packets. This is a bad assumption
131(the flow only contains IP packets if one parses and skips over the
132802.1Q header) and it could cause the application's behavior to change
133across kernel versions even though it follows the compatibility rules.
134
135The solution is to use a set of nested attributes. This is, for
136example, why 802.1Q support uses nested attributes. A TCP packet in
137VLAN 10 is actually expressed as:
138
139 eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800),
140 ip(proto=6, ...), tcp(...)))
141
142Notice how the "eth_type", "ip", and "tcp" flow key attributes are
143nested inside the "encap" attribute. Thus, an application that does
144not understand the "vlan" key will not see either of those attributes
145and therefore will not misinterpret them. (Also, the outer eth_type
146is still 0x8100, not changed to 0x0800.)
147
148Handling malformed packets
149--------------------------
150
151Don't drop packets in the kernel for malformed protocol headers, bad
152checksums, etc. This would prevent userspace from implementing a
153simple Ethernet switch that forwards every packet.
154
155Instead, in such a case, include an attribute with "empty" content.
156It doesn't matter if the empty content could be valid protocol values,
157as long as those values are rarely seen in practice, because userspace
158can always forward all packets with those values to userspace and
159handle them individually.
160
161For example, consider a packet that contains an IP header that
162indicates protocol 6 for TCP, but which is truncated just after the IP
163header, so that the TCP header is missing. The flow key for this
164packet would include a tcp attribute with all-zero src and dst, like
165this:
166
167 eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0)
168
169As another example, consider a packet with an Ethernet type of 0x8100,
170indicating that a VLAN TCI should follow, but which is truncated just
171after the Ethernet type. The flow key for this packet would include
172an all-zero-bits vlan and an empty encap attribute, like this:
173
174 eth(...), eth_type(0x8100), vlan(0), encap()
175
176Unlike a TCP packet with source and destination ports 0, an
177all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka
178VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan
179attribute expressly to allow this situation to be distinguished.
180Thus, the flow key in this second example unambiguously indicates a
181missing or malformed VLAN TCI.
182
183Other rules
184-----------
185
186The other rules for flow keys are much less subtle:
187
188 - Duplicate attributes are not allowed at a given nesting level.
189
190 - Ordering of attributes is not significant.
191
192 - When the kernel sends a given flow key to userspace, it always
193 composes it the same way. This allows userspace to hash and
194 compare entire flow keys that it may not be able to fully
195 interpret.
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 4acea6603720..1c08a4b0981f 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -155,7 +155,7 @@ As capture, each frame contains two parts:
155 155
156 /* fill sockaddr_ll struct to prepare binding */ 156 /* fill sockaddr_ll struct to prepare binding */
157 my_addr.sll_family = AF_PACKET; 157 my_addr.sll_family = AF_PACKET;
158 my_addr.sll_protocol = ETH_P_ALL; 158 my_addr.sll_protocol = htons(ETH_P_ALL);
159 my_addr.sll_ifindex = s_ifr.ifr_ifindex; 159 my_addr.sll_ifindex = s_ifr.ifr_ifindex;
160 160
161 /* bind socket to eth0 */ 161 /* bind socket to eth0 */
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index a177de21d28e..579994afbe06 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -208,7 +208,7 @@ The counter in rps_dev_flow_table values records the length of the current
208CPU's backlog when a packet in this flow was last enqueued. Each backlog 208CPU's backlog when a packet in this flow was last enqueued. Each backlog
209queue has a head counter that is incremented on dequeue. A tail counter 209queue has a head counter that is incremented on dequeue. A tail counter
210is computed as head counter + queue length. In other words, the counter 210is computed as head counter + queue length. In other words, the counter
211in rps_dev_flow_table[i] records the last element in flow i that has 211in rps_dev_flow[i] records the last element in flow i that has
212been enqueued onto the currently designated CPU for flow i (of course, 212been enqueued onto the currently designated CPU for flow i (of course,
213entry i is actually selected by hash and multiple flows may hash to the 213entry i is actually selected by hash and multiple flows may hash to the
214same entry i). 214same entry i).
@@ -224,7 +224,7 @@ following is true:
224 224
225- The current CPU's queue head counter >= the recorded tail counter 225- The current CPU's queue head counter >= the recorded tail counter
226 value in rps_dev_flow[i] 226 value in rps_dev_flow[i]
227- The current CPU is unset (equal to NR_CPUS) 227- The current CPU is unset (equal to RPS_NO_CPU)
228- The current CPU is offline 228- The current CPU is offline
229 229
230After this check, the packet is sent to the (possibly updated) current 230After this check, the packet is sent to the (possibly updated) current
@@ -235,7 +235,7 @@ CPU.
235 235
236==== RFS Configuration 236==== RFS Configuration
237 237
238RFS is only available if the kconfig symbol CONFIG_RFS is enabled (on 238RFS is only available if the kconfig symbol CONFIG_RPS is enabled (on
239by default for SMP). The functionality remains disabled until explicitly 239by default for SMP). The functionality remains disabled until explicitly
240configured. The number of entries in the global flow table is set through: 240configured. The number of entries in the global flow table is set through:
241 241
@@ -258,7 +258,7 @@ For a single queue device, the rps_flow_cnt value for the single queue
258would normally be configured to the same value as rps_sock_flow_entries. 258would normally be configured to the same value as rps_sock_flow_entries.
259For a multi-queue device, the rps_flow_cnt for each queue might be 259For a multi-queue device, the rps_flow_cnt for each queue might be
260configured as rps_sock_flow_entries / N, where N is the number of 260configured as rps_sock_flow_entries / N, where N is the number of
261queues. So for instance, if rps_flow_entries is set to 32768 and there 261queues. So for instance, if rps_sock_flow_entries is set to 32768 and there
262are 16 configured receive queues, rps_flow_cnt for each queue might be 262are 16 configured receive queues, rps_flow_cnt for each queue might be
263configured as 2048. 263configured as 2048.
264 264
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 8d67980fabe8..d0aeeadd264b 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -4,14 +4,16 @@ Copyright (C) 2007-2010 STMicroelectronics Ltd
4Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> 4Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
5 5
6This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers 6This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
7(Synopsys IP blocks); it has been fully tested on STLinux platforms. 7(Synopsys IP blocks).
8 8
9Currently this network device driver is for all STM embedded MAC/GMAC 9Currently this network device driver is for all STM embedded MAC/GMAC
10(i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr. 10(i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000
11FF1152AMT0221 D1215994A VIRTEX FPGA board.
11 12
12DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 13DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether MAC 10/100
13Universal version 4.0 have been used for developing the first code 14Universal version 4.0 have been used for developing this driver.
14implementation. 15
16This driver supports both the platform bus and PCI.
15 17
16Please, for more information also visit: www.stlinux.com 18Please, for more information also visit: www.stlinux.com
17 19
@@ -277,5 +279,5 @@ In fact, these can generate an huge amount of debug messages.
277 279
2786) TODO: 2806) TODO:
279 o XGMAC is not supported. 281 o XGMAC is not supported.
280 o Review the timer optimisation code to use an embedded device that will be 282 o Add the EEE - Energy Efficient Ethernet
281 available in new chip generations. 283 o Add the PTP - precision time protocol
diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt
new file mode 100644
index 000000000000..5a013686b9ea
--- /dev/null
+++ b/Documentation/networking/team.txt
@@ -0,0 +1,2 @@
1Team devices are driven from userspace via libteam library which is here:
2 https://github.com/jpirko/libteam
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index b04cb7d45a16..6727b92bc2fb 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -7,12 +7,9 @@ This subsystem deals with:
7 7
8- Multiplexing of pins, pads, fingers (etc) see below for details 8- Multiplexing of pins, pads, fingers (etc) see below for details
9 9
10The intention is to also deal with: 10- Configuration of pins, pads, fingers (etc), such as software-controlled
11 11 biasing and driving mode specific pins, such as pull-up/down, open drain,
12- Software-controlled biasing and driving mode specific pins, such as 12 load capacitance etc.
13 pull-up/down, open drain etc, load capacitance configuration when controlled
14 by software, etc.
15
16 13
17Top-level interface 14Top-level interface
18=================== 15===================
@@ -32,7 +29,7 @@ Definition of PIN:
32 be sparse - i.e. there may be gaps in the space with numbers where no 29 be sparse - i.e. there may be gaps in the space with numbers where no
33 pin exists. 30 pin exists.
34 31
35When a PIN CONTROLLER is instatiated, it will register a descriptor to the 32When a PIN CONTROLLER is instantiated, it will register a descriptor to the
36pin control framework, and this descriptor contains an array of pin descriptors 33pin control framework, and this descriptor contains an array of pin descriptors
37describing the pins handled by this specific pin controller. 34describing the pins handled by this specific pin controller.
38 35
@@ -61,14 +58,14 @@ this in our driver:
61 58
62#include <linux/pinctrl/pinctrl.h> 59#include <linux/pinctrl/pinctrl.h>
63 60
64const struct pinctrl_pin_desc __refdata foo_pins[] = { 61const struct pinctrl_pin_desc foo_pins[] = {
65 PINCTRL_PIN(0, "A1"), 62 PINCTRL_PIN(0, "A8"),
66 PINCTRL_PIN(1, "A2"), 63 PINCTRL_PIN(1, "B8"),
67 PINCTRL_PIN(2, "A3"), 64 PINCTRL_PIN(2, "C8"),
68 ... 65 ...
69 PINCTRL_PIN(61, "H6"), 66 PINCTRL_PIN(61, "F1"),
70 PINCTRL_PIN(62, "H7"), 67 PINCTRL_PIN(62, "G1"),
71 PINCTRL_PIN(63, "H8"), 68 PINCTRL_PIN(63, "H1"),
72}; 69};
73 70
74static struct pinctrl_desc foo_desc = { 71static struct pinctrl_desc foo_desc = {
@@ -88,11 +85,16 @@ int __init foo_probe(void)
88 pr_err("could not register foo pin driver\n"); 85 pr_err("could not register foo pin driver\n");
89} 86}
90 87
88To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and
89selected drivers, you need to select them from your machine's Kconfig entry,
90since these are so tightly integrated with the machines they are used on.
91See for example arch/arm/mach-u300/Kconfig for an example.
92
91Pins usually have fancier names than this. You can find these in the dataheet 93Pins usually have fancier names than this. You can find these in the dataheet
92for your chip. Notice that the core pinctrl.h file provides a fancy macro 94for your chip. Notice that the core pinctrl.h file provides a fancy macro
93called PINCTRL_PIN() to create the struct entries. As you can see I enumerated 95called PINCTRL_PIN() to create the struct entries. As you can see I enumerated
94the pins from 0 in the upper left corner to 63 in the lower right corner, 96the pins from 0 in the upper left corner to 63 in the lower right corner.
95this enumeration was arbitrarily chosen, in practice you need to think 97This enumeration was arbitrarily chosen, in practice you need to think
96through your numbering system so that it matches the layout of registers 98through your numbering system so that it matches the layout of registers
97and such things in your driver, or the code may become complicated. You must 99and such things in your driver, or the code may become complicated. You must
98also consider matching of offsets to the GPIO ranges that may be handled by 100also consider matching of offsets to the GPIO ranges that may be handled by
@@ -133,8 +135,8 @@ struct foo_group {
133 const unsigned num_pins; 135 const unsigned num_pins;
134}; 136};
135 137
136static unsigned int spi0_pins[] = { 0, 8, 16, 24 }; 138static const unsigned int spi0_pins[] = { 0, 8, 16, 24 };
137static unsigned int i2c0_pins[] = { 24, 25 }; 139static const unsigned int i2c0_pins[] = { 24, 25 };
138 140
139static const struct foo_group foo_groups[] = { 141static const struct foo_group foo_groups[] = {
140 { 142 {
@@ -193,6 +195,88 @@ structure, for example specific register ranges associated with each group
193and so on. 195and so on.
194 196
195 197
198Pin configuration
199=================
200
201Pins can sometimes be software-configured in an various ways, mostly related
202to their electronic properties when used as inputs or outputs. For example you
203may be able to make an output pin high impedance, or "tristate" meaning it is
204effectively disconnected. You may be able to connect an input pin to VDD or GND
205using a certain resistor value - pull up and pull down - so that the pin has a
206stable value when nothing is driving the rail it is connected to, or when it's
207unconnected.
208
209For example, a platform may do this:
210
211ret = pin_config_set("foo-dev", "FOO_GPIO_PIN", PLATFORM_X_PULL_UP);
212
213To pull up a pin to VDD. The pin configuration driver implements callbacks for
214changing pin configuration in the pin controller ops like this:
215
216#include <linux/pinctrl/pinctrl.h>
217#include <linux/pinctrl/pinconf.h>
218#include "platform_x_pindefs.h"
219
220static int foo_pin_config_get(struct pinctrl_dev *pctldev,
221 unsigned offset,
222 unsigned long *config)
223{
224 struct my_conftype conf;
225
226 ... Find setting for pin @ offset ...
227
228 *config = (unsigned long) conf;
229}
230
231static int foo_pin_config_set(struct pinctrl_dev *pctldev,
232 unsigned offset,
233 unsigned long config)
234{
235 struct my_conftype *conf = (struct my_conftype *) config;
236
237 switch (conf) {
238 case PLATFORM_X_PULL_UP:
239 ...
240 }
241 }
242}
243
244static int foo_pin_config_group_get (struct pinctrl_dev *pctldev,
245 unsigned selector,
246 unsigned long *config)
247{
248 ...
249}
250
251static int foo_pin_config_group_set (struct pinctrl_dev *pctldev,
252 unsigned selector,
253 unsigned long config)
254{
255 ...
256}
257
258static struct pinconf_ops foo_pconf_ops = {
259 .pin_config_get = foo_pin_config_get,
260 .pin_config_set = foo_pin_config_set,
261 .pin_config_group_get = foo_pin_config_group_get,
262 .pin_config_group_set = foo_pin_config_group_set,
263};
264
265/* Pin config operations are handled by some pin controller */
266static struct pinctrl_desc foo_desc = {
267 ...
268 .confops = &foo_pconf_ops,
269};
270
271Since some controllers have special logic for handling entire groups of pins
272they can exploit the special whole-group pin control function. The
273pin_config_group_set() callback is allowed to return the error code -EAGAIN,
274for groups it does not want to handle, or if it just wants to do some
275group-level handling and then fall through to iterate over all pins, in which
276case each individual pin will be treated by separate pin_config_set() calls as
277well.
278
279
196Interaction with the GPIO subsystem 280Interaction with the GPIO subsystem
197=================================== 281===================================
198 282
@@ -214,19 +298,20 @@ static struct pinctrl_gpio_range gpio_range_a = {
214 .name = "chip a", 298 .name = "chip a",
215 .id = 0, 299 .id = 0,
216 .base = 32, 300 .base = 32,
301 .pin_base = 32,
217 .npins = 16, 302 .npins = 16,
218 .gc = &chip_a; 303 .gc = &chip_a;
219}; 304};
220 305
221static struct pinctrl_gpio_range gpio_range_a = { 306static struct pinctrl_gpio_range gpio_range_b = {
222 .name = "chip b", 307 .name = "chip b",
223 .id = 0, 308 .id = 0,
224 .base = 48, 309 .base = 48,
310 .pin_base = 64,
225 .npins = 8, 311 .npins = 8,
226 .gc = &chip_b; 312 .gc = &chip_b;
227}; 313};
228 314
229
230{ 315{
231 struct pinctrl_dev *pctl; 316 struct pinctrl_dev *pctl;
232 ... 317 ...
@@ -235,42 +320,39 @@ static struct pinctrl_gpio_range gpio_range_a = {
235} 320}
236 321
237So this complex system has one pin controller handling two different 322So this complex system has one pin controller handling two different
238GPIO chips. Chip a has 16 pins and chip b has 8 pins. They are mapped in 323GPIO chips. "chip a" has 16 pins and "chip b" has 8 pins. The "chip a" and
239the global GPIO pin space at: 324"chip b" have different .pin_base, which means a start pin number of the
325GPIO range.
326
327The GPIO range of "chip a" starts from the GPIO base of 32 and actual
328pin range also starts from 32. However "chip b" has different starting
329offset for the GPIO range and pin range. The GPIO range of "chip b" starts
330from GPIO number 48, while the pin range of "chip b" starts from 64.
331
332We can convert a gpio number to actual pin number using this "pin_base".
333They are mapped in the global GPIO pin space at:
240 334
241chip a: [32 .. 47] 335chip a:
242chip b: [48 .. 55] 336 - GPIO range : [32 .. 47]
337 - pin range : [32 .. 47]
338chip b:
339 - GPIO range : [48 .. 55]
340 - pin range : [64 .. 71]
243 341
244When GPIO-specific functions in the pin control subsystem are called, these 342When GPIO-specific functions in the pin control subsystem are called, these
245ranges will be used to look up the apropriate pin controller by inspecting 343ranges will be used to look up the appropriate pin controller by inspecting
246and matching the pin to the pin ranges across all controllers. When a 344and matching the pin to the pin ranges across all controllers. When a
247pin controller handling the matching range is found, GPIO-specific functions 345pin controller handling the matching range is found, GPIO-specific functions
248will be called on that specific pin controller. 346will be called on that specific pin controller.
249 347
250For all functionalities dealing with pin biasing, pin muxing etc, the pin 348For all functionalities dealing with pin biasing, pin muxing etc, the pin
251controller subsystem will subtract the range's .base offset from the passed 349controller subsystem will subtract the range's .base offset from the passed
252in gpio pin number, and pass that on to the pin control driver, so the driver 350in gpio number, and add the ranges's .pin_base offset to retrive a pin number.
253will get an offset into its handled number range. Further it is also passed 351After that, the subsystem passes it on to the pin control driver, so the driver
352will get an pin number into its handled number range. Further it is also passed
254the range ID value, so that the pin controller knows which range it should 353the range ID value, so that the pin controller knows which range it should
255deal with. 354deal with.
256 355
257For example: if a user issues pinctrl_gpio_set_foo(50), the pin control
258subsystem will find that the second range on this pin controller matches,
259subtract the base 48 and call the
260pinctrl_driver_gpio_set_foo(pinctrl, range, 2) where the latter function has
261this signature:
262
263int pinctrl_driver_gpio_set_foo(struct pinctrl_dev *pctldev,
264 struct pinctrl_gpio_range *rangeid,
265 unsigned offset);
266
267Now the driver knows that we want to do some GPIO-specific operation on the
268second GPIO range handled by "chip b", at offset 2 in that specific range.
269
270(If the GPIO subsystem is ever refactored to use a local per-GPIO controller
271pin space, this mapping will need to be augmented accordingly.)
272
273
274PINMUX interfaces 356PINMUX interfaces
275================= 357=================
276 358
@@ -438,7 +520,7 @@ you. Define enumerators only for the pins you can control if that makes sense.
438 520
439Assumptions: 521Assumptions:
440 522
441We assume that the number possible function maps to pin groups is limited by 523We assume that the number of possible function maps to pin groups is limited by
442the hardware. I.e. we assume that there is no system where any function can be 524the hardware. I.e. we assume that there is no system where any function can be
443mapped to any pin, like in a phone exchange. So the available pins groups for 525mapped to any pin, like in a phone exchange. So the available pins groups for
444a certain function will be limited to a few choices (say up to eight or so), 526a certain function will be limited to a few choices (say up to eight or so),
@@ -585,7 +667,7 @@ int foo_list_funcs(struct pinctrl_dev *pctldev, unsigned selector)
585 667
586const char *foo_get_fname(struct pinctrl_dev *pctldev, unsigned selector) 668const char *foo_get_fname(struct pinctrl_dev *pctldev, unsigned selector)
587{ 669{
588 return myfuncs[selector].name; 670 return foo_functions[selector].name;
589} 671}
590 672
591static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector, 673static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector,
@@ -600,16 +682,16 @@ static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector,
600int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, 682int foo_enable(struct pinctrl_dev *pctldev, unsigned selector,
601 unsigned group) 683 unsigned group)
602{ 684{
603 u8 regbit = (1 << group); 685 u8 regbit = (1 << selector + group);
604 686
605 writeb((readb(MUX)|regbit), MUX) 687 writeb((readb(MUX)|regbit), MUX)
606 return 0; 688 return 0;
607} 689}
608 690
609int foo_disable(struct pinctrl_dev *pctldev, unsigned selector, 691void foo_disable(struct pinctrl_dev *pctldev, unsigned selector,
610 unsigned group) 692 unsigned group)
611{ 693{
612 u8 regbit = (1 << group); 694 u8 regbit = (1 << selector + group);
613 695
614 writeb((readb(MUX) & ~(regbit)), MUX) 696 writeb((readb(MUX) & ~(regbit)), MUX)
615 return 0; 697 return 0;
@@ -647,6 +729,17 @@ All the above functions are mandatory to implement for a pinmux driver.
647Pinmux interaction with the GPIO subsystem 729Pinmux interaction with the GPIO subsystem
648========================================== 730==========================================
649 731
732The public pinmux API contains two functions named pinmux_request_gpio()
733and pinmux_free_gpio(). These two functions shall *ONLY* be called from
734gpiolib-based drivers as part of their gpio_request() and
735gpio_free() semantics. Likewise the pinmux_gpio_direction_[input|output]
736shall only be called from within respective gpio_direction_[input|output]
737gpiolib implementation.
738
739NOTE that platforms and individual drivers shall *NOT* request GPIO pins to be
740muxed in. Instead, implement a proper gpiolib driver and have that driver
741request proper muxing for its pins.
742
650The function list could become long, especially if you can convert every 743The function list could become long, especially if you can convert every
651individual pin into a GPIO pin independent of any other pins, and then try 744individual pin into a GPIO pin independent of any other pins, and then try
652the approach to define every pin as a function. 745the approach to define every pin as a function.
@@ -654,19 +747,24 @@ the approach to define every pin as a function.
654In this case, the function array would become 64 entries for each GPIO 747In this case, the function array would become 64 entries for each GPIO
655setting and then the device functions. 748setting and then the device functions.
656 749
657For this reason there is an additional function a pinmux driver can implement 750For this reason there are two functions a pinmux driver can implement
658to enable only GPIO on an individual pin: .gpio_request_enable(). The same 751to enable only GPIO on an individual pin: .gpio_request_enable() and
659.free() function as for other functions is assumed to be usable also for 752.gpio_disable_free().
660GPIO pins.
661 753
662This function will pass in the affected GPIO range identified by the pin 754This function will pass in the affected GPIO range identified by the pin
663controller core, so you know which GPIO pins are being affected by the request 755controller core, so you know which GPIO pins are being affected by the request
664operation. 756operation.
665 757
666Alternatively it is fully allowed to use named functions for each GPIO 758If your driver needs to have an indication from the framework of whether the
667pin, the pinmux_request_gpio() will attempt to obtain the function "gpioN" 759GPIO pin shall be used for input or output you can implement the
668where "N" is the global GPIO pin number if no special GPIO-handler is 760.gpio_set_direction() function. As described this shall be called from the
669registered. 761gpiolib driver and the affected GPIO range, pin offset and desired direction
762will be passed along to this function.
763
764Alternatively to using these special functions, it is fully allowed to use
765named functions for each GPIO pin, the pinmux_request_gpio() will attempt to
766obtain the function "gpioN" where "N" is the global GPIO pin number if no
767special GPIO-handler is registered.
670 768
671 769
672Pinmux board/machine configuration 770Pinmux board/machine configuration
@@ -683,19 +781,19 @@ spi on the second function mapping:
683 781
684#include <linux/pinctrl/machine.h> 782#include <linux/pinctrl/machine.h>
685 783
686static struct pinmux_map pmx_mapping[] = { 784static const struct pinmux_map __initdata pmx_mapping[] = {
687 { 785 {
688 .ctrl_dev_name = "pinctrl.0", 786 .ctrl_dev_name = "pinctrl-foo",
689 .function = "spi0", 787 .function = "spi0",
690 .dev_name = "foo-spi.0", 788 .dev_name = "foo-spi.0",
691 }, 789 },
692 { 790 {
693 .ctrl_dev_name = "pinctrl.0", 791 .ctrl_dev_name = "pinctrl-foo",
694 .function = "i2c0", 792 .function = "i2c0",
695 .dev_name = "foo-i2c.0", 793 .dev_name = "foo-i2c.0",
696 }, 794 },
697 { 795 {
698 .ctrl_dev_name = "pinctrl.0", 796 .ctrl_dev_name = "pinctrl-foo",
699 .function = "mmc0", 797 .function = "mmc0",
700 .dev_name = "foo-mmc.0", 798 .dev_name = "foo-mmc.0",
701 }, 799 },
@@ -714,14 +812,14 @@ for example if they are not yet instantiated or cumbersome to obtain.
714 812
715You register this pinmux mapping to the pinmux subsystem by simply: 813You register this pinmux mapping to the pinmux subsystem by simply:
716 814
717 ret = pinmux_register_mappings(&pmx_mapping, ARRAY_SIZE(pmx_mapping)); 815 ret = pinmux_register_mappings(pmx_mapping, ARRAY_SIZE(pmx_mapping));
718 816
719Since the above construct is pretty common there is a helper macro to make 817Since the above construct is pretty common there is a helper macro to make
720it even more compact which assumes you want to use pinctrl.0 and position 818it even more compact which assumes you want to use pinctrl-foo and position
7210 for mapping, for example: 8190 for mapping, for example:
722 820
723static struct pinmux_map pmx_mapping[] = { 821static struct pinmux_map __initdata pmx_mapping[] = {
724 PINMUX_MAP_PRIMARY("I2CMAP", "i2c0", "foo-i2c.0"), 822 PINMUX_MAP("I2CMAP", "pinctrl-foo", "i2c0", "foo-i2c.0"),
725}; 823};
726 824
727 825
@@ -734,14 +832,14 @@ As it is possible to map a function to different groups of pins an optional
734... 832...
735{ 833{
736 .name = "spi0-pos-A", 834 .name = "spi0-pos-A",
737 .ctrl_dev_name = "pinctrl.0", 835 .ctrl_dev_name = "pinctrl-foo",
738 .function = "spi0", 836 .function = "spi0",
739 .group = "spi0_0_grp", 837 .group = "spi0_0_grp",
740 .dev_name = "foo-spi.0", 838 .dev_name = "foo-spi.0",
741}, 839},
742{ 840{
743 .name = "spi0-pos-B", 841 .name = "spi0-pos-B",
744 .ctrl_dev_name = "pinctrl.0", 842 .ctrl_dev_name = "pinctrl-foo",
745 .function = "spi0", 843 .function = "spi0",
746 .group = "spi0_1_grp", 844 .group = "spi0_1_grp",
747 .dev_name = "foo-spi.0", 845 .dev_name = "foo-spi.0",
@@ -760,44 +858,44 @@ case), we define a mapping like this:
760... 858...
761{ 859{
762 .name "2bit" 860 .name "2bit"
763 .ctrl_dev_name = "pinctrl.0", 861 .ctrl_dev_name = "pinctrl-foo",
764 .function = "mmc0", 862 .function = "mmc0",
765 .group = "mmc0_0_grp", 863 .group = "mmc0_1_grp",
766 .dev_name = "foo-mmc.0", 864 .dev_name = "foo-mmc.0",
767}, 865},
768{ 866{
769 .name "4bit" 867 .name "4bit"
770 .ctrl_dev_name = "pinctrl.0", 868 .ctrl_dev_name = "pinctrl-foo",
771 .function = "mmc0", 869 .function = "mmc0",
772 .group = "mmc0_0_grp", 870 .group = "mmc0_1_grp",
773 .dev_name = "foo-mmc.0", 871 .dev_name = "foo-mmc.0",
774}, 872},
775{ 873{
776 .name "4bit" 874 .name "4bit"
777 .ctrl_dev_name = "pinctrl.0", 875 .ctrl_dev_name = "pinctrl-foo",
778 .function = "mmc0", 876 .function = "mmc0",
779 .group = "mmc0_1_grp", 877 .group = "mmc0_2_grp",
780 .dev_name = "foo-mmc.0", 878 .dev_name = "foo-mmc.0",
781}, 879},
782{ 880{
783 .name "8bit" 881 .name "8bit"
784 .ctrl_dev_name = "pinctrl.0", 882 .ctrl_dev_name = "pinctrl-foo",
785 .function = "mmc0", 883 .function = "mmc0",
786 .group = "mmc0_0_grp", 884 .group = "mmc0_1_grp",
787 .dev_name = "foo-mmc.0", 885 .dev_name = "foo-mmc.0",
788}, 886},
789{ 887{
790 .name "8bit" 888 .name "8bit"
791 .ctrl_dev_name = "pinctrl.0", 889 .ctrl_dev_name = "pinctrl-foo",
792 .function = "mmc0", 890 .function = "mmc0",
793 .group = "mmc0_1_grp", 891 .group = "mmc0_2_grp",
794 .dev_name = "foo-mmc.0", 892 .dev_name = "foo-mmc.0",
795}, 893},
796{ 894{
797 .name "8bit" 895 .name "8bit"
798 .ctrl_dev_name = "pinctrl.0", 896 .ctrl_dev_name = "pinctrl-foo",
799 .function = "mmc0", 897 .function = "mmc0",
800 .group = "mmc0_2_grp", 898 .group = "mmc0_3_grp",
801 .dev_name = "foo-mmc.0", 899 .dev_name = "foo-mmc.0",
802}, 900},
803... 901...
@@ -898,7 +996,7 @@ like this:
898 996
899{ 997{
900 .name "POWERMAP" 998 .name "POWERMAP"
901 .ctrl_dev_name = "pinctrl.0", 999 .ctrl_dev_name = "pinctrl-foo",
902 .function = "power_func", 1000 .function = "power_func",
903 .hog_on_boot = true, 1001 .hog_on_boot = true,
904}, 1002},
diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
new file mode 100644
index 000000000000..fdcca991df30
--- /dev/null
+++ b/Documentation/power/charger-manager.txt
@@ -0,0 +1,163 @@
1Charger Manager
2 (C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
3
4Charger Manager provides in-kernel battery charger management that
5requires temperature monitoring during suspend-to-RAM state
6and where each battery may have multiple chargers attached and the userland
7wants to look at the aggregated information of the multiple chargers.
8
9Charger Manager is a platform_driver with power-supply-class entries.
10An instance of Charger Manager (a platform-device created with Charger-Manager)
11represents an independent battery with chargers. If there are multiple
12batteries with their own chargers acting independently in a system,
13the system may need multiple instances of Charger Manager.
14
151. Introduction
16===============
17
18Charger Manager supports the following:
19
20* Support for multiple chargers (e.g., a device with USB, AC, and solar panels)
21 A system may have multiple chargers (or power sources) and some of
22 they may be activated at the same time. Each charger may have its
23 own power-supply-class and each power-supply-class can provide
24 different information about the battery status. This framework
25 aggregates charger-related information from multiple sources and
26 shows combined information as a single power-supply-class.
27
28* Support for in suspend-to-RAM polling (with suspend_again callback)
29 While the battery is being charged and the system is in suspend-to-RAM,
30 we may need to monitor the battery health by looking at the ambient or
31 battery temperature. We can accomplish this by waking up the system
32 periodically. However, such a method wakes up devices unncessary for
33 monitoring the battery health and tasks, and user processes that are
34 supposed to be kept suspended. That, in turn, incurs unnecessary power
35 consumption and slow down charging process. Or even, such peak power
36 consumption can stop chargers in the middle of charging
37 (external power input < device power consumption), which not
38 only affects the charging time, but the lifespan of the battery.
39
40 Charger Manager provides a function "cm_suspend_again" that can be
41 used as suspend_again callback of platform_suspend_ops. If the platform
42 requires tasks other than cm_suspend_again, it may implement its own
43 suspend_again callback that calls cm_suspend_again in the middle.
44 Normally, the platform will need to resume and suspend some devices
45 that are used by Charger Manager.
46
472. Global Charger-Manager Data related with suspend_again
48========================================================
49In order to setup Charger Manager with suspend-again feature
50(in-suspend monitoring), the user should provide charger_global_desc
51with setup_charger_manager(struct charger_global_desc *).
52This charger_global_desc data for in-suspend monitoring is global
53as the name suggests. Thus, the user needs to provide only once even
54if there are multiple batteries. If there are multiple batteries, the
55multiple instances of Charger Manager share the same charger_global_desc
56and it will manage in-suspend monitoring for all instances of Charger Manager.
57
58The user needs to provide all the two entries properly in order to activate
59in-suspend monitoring:
60
61struct charger_global_desc {
62
63char *rtc_name;
64 : The name of rtc (e.g., "rtc0") used to wakeup the system from
65 suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
66 should be able to wake up the system from suspend. Charger Manager
67 saves and restores the alarm value and use the previously-defined
68 alarm if it is going to go off earlier than Charger Manager so that
69 Charger Manager does not interfere with previously-defined alarms.
70
71bool (*rtc_only_wakeup)(void);
72 : This callback should let CM know whether
73 the wakeup-from-suspend is caused only by the alarm of "rtc" in the
74 same struct. If there is any other wakeup source triggered the
75 wakeup, it should return false. If the "rtc" is the only wakeup
76 reason, it should return true.
77};
78
793. How to setup suspend_again
80=============================
81Charger Manager provides a function "extern bool cm_suspend_again(void)".
82When cm_suspend_again is called, it monitors every battery. The suspend_ops
83callback of the system's platform_suspend_ops can call cm_suspend_again
84function to know whether Charger Manager wants to suspend again or not.
85If there are no other devices or tasks that want to use suspend_again
86feature, the platform_suspend_ops may directly refer to cm_suspend_again
87for its suspend_again callback.
88
89The cm_suspend_again() returns true (meaning "I want to suspend again")
90if the system was woken up by Charger Manager and the polling
91(in-suspend monitoring) results in "normal".
92
934. Charger-Manager Data (struct charger_desc)
94=============================================
95For each battery charged independently from other batteries (if a series of
96batteries are charged by a single charger, they are counted as one independent
97battery), an instance of Charger Manager is attached to it.
98
99struct charger_desc {
100
101char *psy_name;
102 : The power-supply-class name of the battery. Default is
103 "battery" if psy_name is NULL. Users can access the psy entries
104 at "/sys/class/power_supply/[psy_name]/".
105
106enum polling_modes polling_mode;
107 : CM_POLL_DISABLE: do not poll this battery.
108 CM_POLL_ALWAYS: always poll this battery.
109 CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if
110 an external power source is attached.
111 CM_POLL_CHARGING_ONLY: poll this battery if and only if the
112 battery is being charged.
113
114unsigned int fullbatt_uV;
115 : If specified with a non-zero value, Charger Manager assumes
116 that the battery is full (capacity = 100) if the battery is not being
117 charged and the battery voltage is equal to or greater than
118 fullbatt_uV.
119
120unsigned int polling_interval_ms;
121 : Required polling interval in ms. Charger Manager will poll
122 this battery every polling_interval_ms or more frequently.
123
124enum data_source battery_present;
125 CM_FUEL_GAUGE: get battery presence information from fuel gauge.
126 CM_CHARGER_STAT: get battery presence from chargers.
127
128char **psy_charger_stat;
129 : An array ending with NULL that has power-supply-class names of
130 chargers. Each power-supply-class should provide "PRESENT" (if
131 battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
132 external power source is attached or not), and "STATUS" (shows whether
133 the battery is {"FULL" or not FULL} or {"FULL", "Charging",
134 "Discharging", "NotCharging"}).
135
136int num_charger_regulators;
137struct regulator_bulk_data *charger_regulators;
138 : Regulators representing the chargers in the form for
139 regulator framework's bulk functions.
140
141char *psy_fuel_gauge;
142 : Power-supply-class name of the fuel gauge.
143
144int (*temperature_out_of_range)(int *mC);
145bool measure_battery_temp;
146 : This callback returns 0 if the temperature is safe for charging,
147 a positive number if it is too hot to charge, and a negative number
148 if it is too cold to charge. With the variable mC, the callback returns
149 the temperature in 1/1000 of centigrade.
150 The source of temperature can be battery or ambient one according to
151 the value of measure_battery_temp.
152};
153
1545. Other Considerations
155=======================
156
157At the charger/battery-related events such as battery-pulled-out,
158charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped,
159and others critical to chargers, the system should be configured to wake up.
160At least the following should wake up the system from a suspend:
161a) charger-on/off b) external-power-in/out c) battery-in/out (while charging)
162
163It is usually accomplished by configuring the PMIC as a wakeup source.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 3139fb505dce..20af7def23c8 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -126,7 +126,9 @@ The core methods to suspend and resume devices reside in struct dev_pm_ops
126pointed to by the ops member of struct dev_pm_domain, or by the pm member of 126pointed to by the ops member of struct dev_pm_domain, or by the pm member of
127struct bus_type, struct device_type and struct class. They are mostly of 127struct bus_type, struct device_type and struct class. They are mostly of
128interest to the people writing infrastructure for platforms and buses, like PCI 128interest to the people writing infrastructure for platforms and buses, like PCI
129or USB, or device type and device class drivers. 129or USB, or device type and device class drivers. They also are relevant to the
130writers of device drivers whose subsystems (PM domains, device types, device
131classes and bus types) don't provide all power management methods.
130 132
131Bus drivers implement these methods as appropriate for the hardware and the 133Bus drivers implement these methods as appropriate for the hardware and the
132drivers using it; PCI works differently from USB, and so on. Not many people 134drivers using it; PCI works differently from USB, and so on. Not many people
@@ -268,32 +270,35 @@ various phases always run after tasks have been frozen and before they are
268unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have 270unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have
269been disabled (except for those marked with the IRQF_NO_SUSPEND flag). 271been disabled (except for those marked with the IRQF_NO_SUSPEND flag).
270 272
271All phases use PM domain, bus, type, or class callbacks (that is, methods 273All phases use PM domain, bus, type, class or driver callbacks (that is, methods
272defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, or dev->class->pm). 274defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or
273These callbacks are regarded by the PM core as mutually exclusive. Moreover, 275dev->driver->pm). These callbacks are regarded by the PM core as mutually
274PM domain callbacks always take precedence over bus, type and class callbacks, 276exclusive. Moreover, PM domain callbacks always take precedence over all of the
275while type callbacks take precedence over bus and class callbacks, and class 277other callbacks and, for example, type callbacks take precedence over bus, class
276callbacks take precedence over bus callbacks. To be precise, the following 278and driver callbacks. To be precise, the following rules are used to determine
277rules are used to determine which callback to execute in the given phase: 279which callback to execute in the given phase:
278 280
279 1. If dev->pm_domain is present, the PM core will attempt to execute the 281 1. If dev->pm_domain is present, the PM core will choose the callback
280 callback included in dev->pm_domain->ops. If that callback is not 282 included in dev->pm_domain->ops for execution
281 present, no action will be carried out for the given device.
282 283
283 2. Otherwise, if both dev->type and dev->type->pm are present, the callback 284 2. Otherwise, if both dev->type and dev->type->pm are present, the callback
284 included in dev->type->pm will be executed. 285 included in dev->type->pm will be chosen for execution.
285 286
286 3. Otherwise, if both dev->class and dev->class->pm are present, the 287 3. Otherwise, if both dev->class and dev->class->pm are present, the
287 callback included in dev->class->pm will be executed. 288 callback included in dev->class->pm will be chosen for execution.
288 289
289 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback 290 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback
290 included in dev->bus->pm will be executed. 291 included in dev->bus->pm will be chosen for execution.
291 292
292This allows PM domains and device types to override callbacks provided by bus 293This allows PM domains and device types to override callbacks provided by bus
293types or device classes if necessary. 294types or device classes if necessary.
294 295
295These callbacks may in turn invoke device- or driver-specific methods stored in 296The PM domain, type, class and bus callbacks may in turn invoke device- or
296dev->driver->pm, but they don't have to. 297driver-specific methods stored in dev->driver->pm, but they don't have to do
298that.
299
300If the subsystem callback chosen for execution is not present, the PM core will
301execute the corresponding method from dev->driver->pm instead if there is one.
297 302
298 303
299Entering System Suspend 304Entering System Suspend
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 316c2ba187f4..6ccb68f68da6 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -21,7 +21,7 @@ freeze_processes() (defined in kernel/power/process.c) is called. It executes
21try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and 21try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and
22either wakes them up, if they are kernel threads, or sends fake signals to them, 22either wakes them up, if they are kernel threads, or sends fake signals to them,
23if they are user space processes. A task that has TIF_FREEZE set, should react 23if they are user space processes. A task that has TIF_FREEZE set, should react
24to it by calling the function called refrigerator() (defined in 24to it by calling the function called __refrigerator() (defined in
25kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state 25kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state
26to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it. 26to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it.
27Then, we say that the task is 'frozen' and therefore the set of functions 27Then, we say that the task is 'frozen' and therefore the set of functions
@@ -29,10 +29,10 @@ handling this mechanism is referred to as 'the freezer' (these functions are
29defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). 29defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h).
30User space processes are generally frozen before kernel threads. 30User space processes are generally frozen before kernel threads.
31 31
32It is not recommended to call refrigerator() directly. Instead, it is 32__refrigerator() must not be called directly. Instead, use the
33recommended to use the try_to_freeze() function (defined in 33try_to_freeze() function (defined in include/linux/freezer.h), that checks
34include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the 34the task's TIF_FREEZE flag and makes the task enter __refrigerator() if the
35task enter refrigerator() if the flag is set. 35flag is set.
36 36
37For user space processes try_to_freeze() is called automatically from the 37For user space processes try_to_freeze() is called automatically from the
38signal-handling code, but the freezable kernel threads need to call it 38signal-handling code, but the freezable kernel threads need to call it
@@ -61,13 +61,13 @@ wait_event_freezable() and wait_event_freezable_timeout() macros.
61After the system memory state has been restored from a hibernation image and 61After the system memory state has been restored from a hibernation image and
62devices have been reinitialized, the function thaw_processes() is called in 62devices have been reinitialized, the function thaw_processes() is called in
63order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that 63order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that
64have been frozen leave refrigerator() and continue running. 64have been frozen leave __refrigerator() and continue running.
65 65
66III. Which kernel threads are freezable? 66III. Which kernel threads are freezable?
67 67
68Kernel threads are not freezable by default. However, a kernel thread may clear 68Kernel threads are not freezable by default. However, a kernel thread may clear
69PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE 69PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
70directly is strongly discouraged). From this point it is regarded as freezable 70directly is not allowed). From this point it is regarded as freezable
71and must call try_to_freeze() in a suitable place. 71and must call try_to_freeze() in a suitable place.
72 72
73IV. Why do we do that? 73IV. Why do we do that?
@@ -176,3 +176,28 @@ tasks, since it generally exists anyway.
176A driver must have all firmwares it may need in RAM before suspend() is called. 176A driver must have all firmwares it may need in RAM before suspend() is called.
177If keeping them is not practical, for example due to their size, they must be 177If keeping them is not practical, for example due to their size, they must be
178requested early enough using the suspend notifier API described in notifiers.txt. 178requested early enough using the suspend notifier API described in notifiers.txt.
179
180VI. Are there any precautions to be taken to prevent freezing failures?
181
182Yes, there are.
183
184First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code
185from system-wide sleep such as suspend/hibernation is not encouraged.
186If possible, that piece of code must instead hook onto the suspend/hibernation
187notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
188(kernel/cpu.c) for an example.
189
190However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary,
191it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since
192that could lead to freezing failures, because if the suspend/hibernate code
193successfully acquired the 'pm_mutex' lock, and hence that other entity failed
194to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
195state. As a consequence, the freezer would not be able to freeze that task,
196leading to freezing failure.
197
198However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
199since they ask the freezer to skip freezing this task, since it is anyway
200"frozen enough" as it is blocked on 'pm_mutex', which will be released
201only after the entire suspend/hibernation sequence is complete.
202So, to summarize, use [un]lock_system_sleep() instead of directly using
203mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt
index 3f8b528f237e..e272d9909e39 100644
--- a/Documentation/power/regulator/regulator.txt
+++ b/Documentation/power/regulator/regulator.txt
@@ -12,7 +12,7 @@ Drivers can register a regulator by calling :-
12 12
13struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, 13struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
14 struct device *dev, struct regulator_init_data *init_data, 14 struct device *dev, struct regulator_init_data *init_data,
15 void *driver_data); 15 void *driver_data, struct device_node *of_node);
16 16
17This will register the regulators capabilities and operations to the regulator 17This will register the regulators capabilities and operations to the regulator
18core. 18core.
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index c2ae8bf77d46..4abe83e1045a 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -57,6 +57,10 @@ the following:
57 57
58 4. Bus type of the device, if both dev->bus and dev->bus->pm are present. 58 4. Bus type of the device, if both dev->bus and dev->bus->pm are present.
59 59
60If the subsystem chosen by applying the above rules doesn't provide the relevant
61callback, the PM core will invoke the corresponding driver callback stored in
62dev->driver->pm directly (if present).
63
60The PM core always checks which callback to use in the order given above, so the 64The PM core always checks which callback to use in the order given above, so the
61priority order of callbacks from high to low is: PM domain, device type, class 65priority order of callbacks from high to low is: PM domain, device type, class
62and bus type. Moreover, the high-priority one will always take precedence over 66and bus type. Moreover, the high-priority one will always take precedence over
@@ -64,86 +68,88 @@ a low-priority one. The PM domain, bus type, device type and class callbacks
64are referred to as subsystem-level callbacks in what follows. 68are referred to as subsystem-level callbacks in what follows.
65 69
66By default, the callbacks are always invoked in process context with interrupts 70By default, the callbacks are always invoked in process context with interrupts
67enabled. However, subsystems can use the pm_runtime_irq_safe() helper function 71enabled. However, the pm_runtime_irq_safe() helper function can be used to tell
68to tell the PM core that their ->runtime_suspend(), ->runtime_resume() and 72the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume()
69->runtime_idle() callbacks may be invoked in atomic context with interrupts 73and ->runtime_idle() callbacks for the given device in atomic context with
70disabled for a given device. This implies that the callback routines in 74interrupts disabled. This implies that the callback routines in question must
71question must not block or sleep, but it also means that the synchronous helper 75not block or sleep, but it also means that the synchronous helper functions
72functions listed at the end of Section 4 may be used for that device within an 76listed at the end of Section 4 may be used for that device within an interrupt
73interrupt handler or generally in an atomic context. 77handler or generally in an atomic context.
74 78
75The subsystem-level suspend callback is _entirely_ _responsible_ for handling 79The subsystem-level suspend callback, if present, is _entirely_ _responsible_
76the suspend of the device as appropriate, which may, but need not include 80for handling the suspend of the device as appropriate, which may, but need not
77executing the device driver's own ->runtime_suspend() callback (from the 81include executing the device driver's own ->runtime_suspend() callback (from the
78PM core's point of view it is not necessary to implement a ->runtime_suspend() 82PM core's point of view it is not necessary to implement a ->runtime_suspend()
79callback in a device driver as long as the subsystem-level suspend callback 83callback in a device driver as long as the subsystem-level suspend callback
80knows what to do to handle the device). 84knows what to do to handle the device).
81 85
82 * Once the subsystem-level suspend callback has completed successfully 86 * Once the subsystem-level suspend callback (or the driver suspend callback,
83 for given device, the PM core regards the device as suspended, which need 87 if invoked directly) has completed successfully for the given device, the PM
84 not mean that the device has been put into a low power state. It is 88 core regards the device as suspended, which need not mean that it has been
85 supposed to mean, however, that the device will not process data and will 89 put into a low power state. It is supposed to mean, however, that the
86 not communicate with the CPU(s) and RAM until the subsystem-level resume 90 device will not process data and will not communicate with the CPU(s) and
87 callback is executed for it. The runtime PM status of a device after 91 RAM until the appropriate resume callback is executed for it. The runtime
88 successful execution of the subsystem-level suspend callback is 'suspended'. 92 PM status of a device after successful execution of the suspend callback is
89 93 'suspended'.
90 * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN, 94
91 the device's runtime PM status is 'active', which means that the device 95 * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM
92 _must_ be fully operational afterwards. 96 status remains 'active', which means that the device _must_ be fully
93 97 operational afterwards.
94 * If the subsystem-level suspend callback returns an error code different 98
95 from -EBUSY or -EAGAIN, the PM core regards this as a fatal error and will 99 * If the suspend callback returns an error code different from -EBUSY and
96 refuse to run the helper functions described in Section 4 for the device, 100 -EAGAIN, the PM core regards this as a fatal error and will refuse to run
97 until the status of it is directly set either to 'active', or to 'suspended' 101 the helper functions described in Section 4 for the device until its status
98 (the PM core provides special helper functions for this purpose). 102 is directly set to either'active', or 'suspended' (the PM core provides
99 103 special helper functions for this purpose).
100In particular, if the driver requires remote wake-up capability (i.e. hardware 104
105In particular, if the driver requires remote wakeup capability (i.e. hardware
101mechanism allowing the device to request a change of its power state, such as 106mechanism allowing the device to request a change of its power state, such as
102PCI PME) for proper functioning and device_run_wake() returns 'false' for the 107PCI PME) for proper functioning and device_run_wake() returns 'false' for the
103device, then ->runtime_suspend() should return -EBUSY. On the other hand, if 108device, then ->runtime_suspend() should return -EBUSY. On the other hand, if
104device_run_wake() returns 'true' for the device and the device is put into a low 109device_run_wake() returns 'true' for the device and the device is put into a
105power state during the execution of the subsystem-level suspend callback, it is 110low-power state during the execution of the suspend callback, it is expected
106expected that remote wake-up will be enabled for the device. Generally, remote 111that remote wakeup will be enabled for the device. Generally, remote wakeup
107wake-up should be enabled for all input devices put into a low power state at 112should be enabled for all input devices put into low-power states at run time.
108run time. 113
109 114The subsystem-level resume callback, if present, is _entirely_ _responsible_ for
110The subsystem-level resume callback is _entirely_ _responsible_ for handling the 115handling the resume of the device as appropriate, which may, but need not
111resume of the device as appropriate, which may, but need not include executing 116include executing the device driver's own ->runtime_resume() callback (from the
112the device driver's own ->runtime_resume() callback (from the PM core's point of 117PM core's point of view it is not necessary to implement a ->runtime_resume()
113view it is not necessary to implement a ->runtime_resume() callback in a device 118callback in a device driver as long as the subsystem-level resume callback knows
114driver as long as the subsystem-level resume callback knows what to do to handle 119what to do to handle the device).
115the device). 120
116 121 * Once the subsystem-level resume callback (or the driver resume callback, if
117 * Once the subsystem-level resume callback has completed successfully, the PM 122 invoked directly) has completed successfully, the PM core regards the device
118 core regards the device as fully operational, which means that the device 123 as fully operational, which means that the device _must_ be able to complete
119 _must_ be able to complete I/O operations as needed. The runtime PM status 124 I/O operations as needed. The runtime PM status of the device is then
120 of the device is then 'active'. 125 'active'.
121 126
122 * If the subsystem-level resume callback returns an error code, the PM core 127 * If the resume callback returns an error code, the PM core regards this as a
123 regards this as a fatal error and will refuse to run the helper functions 128 fatal error and will refuse to run the helper functions described in Section
124 described in Section 4 for the device, until its status is directly set 129 4 for the device, until its status is directly set to either 'active', or
125 either to 'active' or to 'suspended' (the PM core provides special helper 130 'suspended' (by means of special helper functions provided by the PM core
126 functions for this purpose). 131 for this purpose).
127 132
128The subsystem-level idle callback is executed by the PM core whenever the device 133The idle callback (a subsystem-level one, if present, or the driver one) is
129appears to be idle, which is indicated to the PM core by two counters, the 134executed by the PM core whenever the device appears to be idle, which is
130device's usage counter and the counter of 'active' children of the device. 135indicated to the PM core by two counters, the device's usage counter and the
136counter of 'active' children of the device.
131 137
132 * If any of these counters is decreased using a helper function provided by 138 * If any of these counters is decreased using a helper function provided by
133 the PM core and it turns out to be equal to zero, the other counter is 139 the PM core and it turns out to be equal to zero, the other counter is
134 checked. If that counter also is equal to zero, the PM core executes the 140 checked. If that counter also is equal to zero, the PM core executes the
135 subsystem-level idle callback with the device as an argument. 141 idle callback with the device as its argument.
136 142
137The action performed by a subsystem-level idle callback is totally dependent on 143The action performed by the idle callback is totally dependent on the subsystem
138the subsystem in question, but the expected and recommended action is to check 144(or driver) in question, but the expected and recommended action is to check
139if the device can be suspended (i.e. if all of the conditions necessary for 145if the device can be suspended (i.e. if all of the conditions necessary for
140suspending the device are satisfied) and to queue up a suspend request for the 146suspending the device are satisfied) and to queue up a suspend request for the
141device in that case. The value returned by this callback is ignored by the PM 147device in that case. The value returned by this callback is ignored by the PM
142core. 148core.
143 149
144The helper functions provided by the PM core, described in Section 4, guarantee 150The helper functions provided by the PM core, described in Section 4, guarantee
145that the following constraints are met with respect to the bus type's runtime 151that the following constraints are met with respect to runtime PM callbacks for
146PM callbacks: 152one device:
147 153
148(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute 154(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
149 ->runtime_suspend() in parallel with ->runtime_resume() or with another 155 ->runtime_suspend() in parallel with ->runtime_resume() or with another
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index efe998becc5b..462321c1aeea 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -41,7 +41,6 @@ ldd
41Debugging modules 41Debugging modules
42The proc file system 42The proc file system
43Starting points for debugging scripting languages etc. 43Starting points for debugging scripting languages etc.
44Dumptool & Lcrash
45SysRq 44SysRq
46References 45References
47Special Thanks 46Special Thanks
@@ -2455,39 +2454,6 @@ jdb <filename> another fully interactive gdb style debugger.
2455 2454
2456 2455
2457 2456
2458Dumptool & Lcrash ( lkcd )
2459==========================
2460Michael Holzheu & others here at IBM have a fairly mature port of
2461SGI's lcrash tool which allows one to look at kernel structures in a
2462running kernel.
2463
2464It also complements a tool called dumptool which dumps all the kernel's
2465memory pages & registers to either a tape or a disk.
2466This can be used by tech support or an ambitious end user do
2467post mortem debugging of a machine like gdb core dumps.
2468
2469Going into how to use this tool in detail will be explained
2470in other documentation supplied by IBM with the patches & the
2471lcrash homepage http://oss.sgi.com/projects/lkcd/ & the lcrash manpage.
2472
2473How they work
2474-------------
2475Lcrash is a perfectly normal program,however, it requires 2
2476additional files, Kerntypes which is built using a patch to the
2477linux kernel sources in the linux root directory & the System.map.
2478
2479Kerntypes is an objectfile whose sole purpose in life
2480is to provide stabs debug info to lcrash, to do this
2481Kerntypes is built from kerntypes.c which just includes the most commonly
2482referenced header files used when debugging, lcrash can then read the
2483.stabs section of this file.
2484
2485Debugging a live system it uses /dev/mem
2486alternatively for post mortem debugging it uses the data
2487collected by dumptool.
2488
2489
2490
2491SysRq 2457SysRq
2492===== 2458=====
2493This is now supported by linux for s/390 & z/Architecture. 2459This is now supported by linux for s/390 & z/Architecture.
diff --git a/Documentation/scsi/53c700.txt b/Documentation/scsi/53c700.txt
index 0da681d497a2..e31aceb6df15 100644
--- a/Documentation/scsi/53c700.txt
+++ b/Documentation/scsi/53c700.txt
@@ -16,32 +16,13 @@ fill in to get the driver working.
16Compile Time Flags 16Compile Time Flags
17================== 17==================
18 18
19The driver may be either io mapped or memory mapped. This is 19A compile time flag is:
20selectable by configuration flags:
21
22CONFIG_53C700_MEM_MAPPED
23
24define if the driver is memory mapped.
25
26CONFIG_53C700_IO_MAPPED
27
28define if the driver is to be io mapped.
29
30One or other of the above flags *must* be defined.
31
32Other flags are:
33 20
34CONFIG_53C700_LE_ON_BE 21CONFIG_53C700_LE_ON_BE
35 22
36define if the chipset must be supported in little endian mode on a big 23define if the chipset must be supported in little endian mode on a big
37endian architecture (used for the 700 on parisc). 24endian architecture (used for the 700 on parisc).
38 25
39CONFIG_53C700_USE_CONSISTENT
40
41allocate consistent memory (should only be used if your architecture
42has a mixture of consistent and inconsistent memory). Fully
43consistent or fully inconsistent architectures should not define this.
44
45 26
46Using the Chip Core Driver 27Using the Chip Core Driver
47========================== 28==========================
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
index 19bc49439cac..99b85d39751c 100644
--- a/Documentation/security/00-INDEX
+++ b/Documentation/security/00-INDEX
@@ -1,5 +1,7 @@
100-INDEX 100-INDEX
2 - this file. 2 - this file.
3LSM.txt
4 - description of the Linux Security Module framework.
3SELinux.txt 5SELinux.txt
4 - how to get started with the SELinux security enhancement. 6 - how to get started with the SELinux security enhancement.
5Smack.txt 7Smack.txt
diff --git a/Documentation/security/LSM.txt b/Documentation/security/LSM.txt
new file mode 100644
index 000000000000..c335a763a2ed
--- /dev/null
+++ b/Documentation/security/LSM.txt
@@ -0,0 +1,34 @@
1Linux Security Module framework
2-------------------------------
3
4The Linux Security Module (LSM) framework provides a mechanism for
5various security checks to be hooked by new kernel extensions. The name
6"module" is a bit of a misnomer since these extensions are not actually
7loadable kernel modules. Instead, they are selectable at build-time via
8CONFIG_DEFAULT_SECURITY and can be overridden at boot-time via the
9"security=..." kernel command line argument, in the case where multiple
10LSMs were built into a given kernel.
11
12The primary users of the LSM interface are Mandatory Access Control
13(MAC) extensions which provide a comprehensive security policy. Examples
14include SELinux, Smack, Tomoyo, and AppArmor. In addition to the larger
15MAC extensions, other extensions can be built using the LSM to provide
16specific changes to system operation when these tweaks are not available
17in the core functionality of Linux itself.
18
19Without a specific LSM built into the kernel, the default LSM will be the
20Linux capabilities system. Most LSMs choose to extend the capabilities
21system, building their checks on top of the defined capability hooks.
22For more details on capabilities, see capabilities(7) in the Linux
23man-pages project.
24
25Based on http://kerneltrap.org/Linux/Documenting_Security_Module_Intent,
26a new LSM is accepted into the kernel when its intent (a description of
27what it tries to protect against and in what cases one would expect to
28use it) has been appropriately documented in Documentation/security/.
29This allows an LSM's code to be easily compared to its goals, and so
30that end users and distros can make a more informed decision about which
31LSMs suit their requirements.
32
33For extensive documentation on the available LSM hook interfaces, please
34see include/linux/security.h.
diff --git a/Documentation/security/credentials.txt b/Documentation/security/credentials.txt
index fc0366cbd7ce..86257052e31a 100644
--- a/Documentation/security/credentials.txt
+++ b/Documentation/security/credentials.txt
@@ -221,10 +221,10 @@ The Linux kernel supports the following types of credentials:
221 (5) LSM 221 (5) LSM
222 222
223 The Linux Security Module allows extra controls to be placed over the 223 The Linux Security Module allows extra controls to be placed over the
224 operations that a task may do. Currently Linux supports two main 224 operations that a task may do. Currently Linux supports several LSM
225 alternate LSM options: SELinux and Smack. 225 options.
226 226
227 Both work by labelling the objects in a system and then applying sets of 227 Some work by labelling the objects in a system and then applying sets of
228 rules (policies) that say what operations a task with one label may do to 228 rules (policies) that say what operations a task with one label may do to
229 an object with another label. 229 an object with another label.
230 230
diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index 77ba0afbe4db..0a25a9191864 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -101,7 +101,7 @@ hardware.
101 Returns the current state of modem control inputs. The state 101 Returns the current state of modem control inputs. The state
102 of the outputs should not be returned, since the core keeps 102 of the outputs should not be returned, since the core keeps
103 track of their state. The state information should include: 103 track of their state. The state information should include:
104 - TIOCM_DCD state of DCD signal 104 - TIOCM_CAR state of DCD signal
105 - TIOCM_CTS state of CTS signal 105 - TIOCM_CTS state of CTS signal
106 - TIOCM_DSR state of DSR signal 106 - TIOCM_DSR state of DSR signal
107 - TIOCM_RI state of RI signal 107 - TIOCM_RI state of RI signal
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index edad99abec21..c8c54544abc5 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -42,19 +42,7 @@ ALC260
42 42
43ALC262 43ALC262
44====== 44======
45 fujitsu Fujitsu Laptop 45 N/A
46 benq Benq ED8
47 benq-t31 Benq T31
48 hippo Hippo (ATI) with jack detection, Sony UX-90s
49 hippo_1 Hippo (Benq) with jack detection
50 toshiba-s06 Toshiba S06
51 toshiba-rx1 Toshiba RX1
52 tyan Tyan Thunder n6650W (S2915-E)
53 ultra Samsung Q1 Ultra Vista model
54 lenovo-3000 Lenovo 3000 y410
55 nec NEC Versa S9100
56 basic fixed pin assignment w/o SPDIF
57 auto auto-config reading BIOS (default)
58 46
59ALC267/268 47ALC267/268
60========== 48==========
@@ -350,7 +338,6 @@ STAC92HD83*
350 mic-ref Reference board with power management for ports 338 mic-ref Reference board with power management for ports
351 dell-s14 Dell laptop 339 dell-s14 Dell laptop
352 dell-vostro-3500 Dell Vostro 3500 laptop 340 dell-vostro-3500 Dell Vostro 3500 laptop
353 hp HP laptops with (inverted) mute-LED
354 hp-dv7-4000 HP dv-7 4000 341 hp-dv7-4000 HP dv-7 4000
355 auto BIOS setup (default) 342 auto BIOS setup (default)
356 343
diff --git a/Documentation/sound/alsa/compress_offload.txt b/Documentation/sound/alsa/compress_offload.txt
new file mode 100644
index 000000000000..c83a835350f0
--- /dev/null
+++ b/Documentation/sound/alsa/compress_offload.txt
@@ -0,0 +1,188 @@
1 compress_offload.txt
2 =====================
3 Pierre-Louis.Bossart <pierre-louis.bossart@linux.intel.com>
4 Vinod Koul <vinod.koul@linux.intel.com>
5
6Overview
7
8Since its early days, the ALSA API was defined with PCM support or
9constant bitrates payloads such as IEC61937 in mind. Arguments and
10returned values in frames are the norm, making it a challenge to
11extend the existing API to compressed data streams.
12
13In recent years, audio digital signal processors (DSP) were integrated
14in system-on-chip designs, and DSPs are also integrated in audio
15codecs. Processing compressed data on such DSPs results in a dramatic
16reduction of power consumption compared to host-based
17processing. Support for such hardware has not been very good in Linux,
18mostly because of a lack of a generic API available in the mainline
19kernel.
20
21Rather than requiring a compability break with an API change of the
22ALSA PCM interface, a new 'Compressed Data' API is introduced to
23provide a control and data-streaming interface for audio DSPs.
24
25The design of this API was inspired by the 2-year experience with the
26Intel Moorestown SOC, with many corrections required to upstream the
27API in the mainline kernel instead of the staging tree and make it
28usable by others.
29
30Requirements
31
32The main requirements are:
33
34- separation between byte counts and time. Compressed formats may have
35 a header per file, per frame, or no header at all. The payload size
36 may vary from frame-to-frame. As a result, it is not possible to
37 estimate reliably the duration of audio buffers when handling
38 compressed data. Dedicated mechanisms are required to allow for
39 reliable audio-video synchronization, which requires precise
40 reporting of the number of samples rendered at any given time.
41
42- Handling of multiple formats. PCM data only requires a specification
43 of the sampling rate, number of channels and bits per sample. In
44 contrast, compressed data comes in a variety of formats. Audio DSPs
45 may also provide support for a limited number of audio encoders and
46 decoders embedded in firmware, or may support more choices through
47 dynamic download of libraries.
48
49- Focus on main formats. This API provides support for the most
50 popular formats used for audio and video capture and playback. It is
51 likely that as audio compression technology advances, new formats
52 will be added.
53
54- Handling of multiple configurations. Even for a given format like
55 AAC, some implementations may support AAC multichannel but HE-AAC
56 stereo. Likewise WMA10 level M3 may require too much memory and cpu
57 cycles. The new API needs to provide a generic way of listing these
58 formats.
59
60- Rendering/Grabbing only. This API does not provide any means of
61 hardware acceleration, where PCM samples are provided back to
62 user-space for additional processing. This API focuses instead on
63 streaming compressed data to a DSP, with the assumption that the
64 decoded samples are routed to a physical output or logical back-end.
65
66 - Complexity hiding. Existing user-space multimedia frameworks all
67 have existing enums/structures for each compressed format. This new
68 API assumes the existence of a platform-specific compatibility layer
69 to expose, translate and make use of the capabilities of the audio
70 DSP, eg. Android HAL or PulseAudio sinks. By construction, regular
71 applications are not supposed to make use of this API.
72
73
74Design
75
76The new API shares a number of concepts with with the PCM API for flow
77control. Start, pause, resume, drain and stop commands have the same
78semantics no matter what the content is.
79
80The concept of memory ring buffer divided in a set of fragments is
81borrowed from the ALSA PCM API. However, only sizes in bytes can be
82specified.
83
84Seeks/trick modes are assumed to be handled by the host.
85
86The notion of rewinds/forwards is not supported. Data committed to the
87ring buffer cannot be invalidated, except when dropping all buffers.
88
89The Compressed Data API does not make any assumptions on how the data
90is transmitted to the audio DSP. DMA transfers from main memory to an
91embedded audio cluster or to a SPI interface for external DSPs are
92possible. As in the ALSA PCM case, a core set of routines is exposed;
93each driver implementer will have to write support for a set of
94mandatory routines and possibly make use of optional ones.
95
96The main additions are
97
98- get_caps
99This routine returns the list of audio formats supported. Querying the
100codecs on a capture stream will return encoders, decoders will be
101listed for playback streams.
102
103- get_codec_caps For each codec, this routine returns a list of
104capabilities. The intent is to make sure all the capabilities
105correspond to valid settings, and to minimize the risks of
106configuration failures. For example, for a complex codec such as AAC,
107the number of channels supported may depend on a specific profile. If
108the capabilities were exposed with a single descriptor, it may happen
109that a specific combination of profiles/channels/formats may not be
110supported. Likewise, embedded DSPs have limited memory and cpu cycles,
111it is likely that some implementations make the list of capabilities
112dynamic and dependent on existing workloads. In addition to codec
113settings, this routine returns the minimum buffer size handled by the
114implementation. This information can be a function of the DMA buffer
115sizes, the number of bytes required to synchronize, etc, and can be
116used by userspace to define how much needs to be written in the ring
117buffer before playback can start.
118
119- set_params
120This routine sets the configuration chosen for a specific codec. The
121most important field in the parameters is the codec type; in most
122cases decoders will ignore other fields, while encoders will strictly
123comply to the settings
124
125- get_params
126This routines returns the actual settings used by the DSP. Changes to
127the settings should remain the exception.
128
129- get_timestamp
130The timestamp becomes a multiple field structure. It lists the number
131of bytes transferred, the number of samples processed and the number
132of samples rendered/grabbed. All these values can be used to determine
133the avarage bitrate, figure out if the ring buffer needs to be
134refilled or the delay due to decoding/encoding/io on the DSP.
135
136Note that the list of codecs/profiles/modes was derived from the
137OpenMAX AL specification instead of reinventing the wheel.
138Modifications include:
139- Addition of FLAC and IEC formats
140- Merge of encoder/decoder capabilities
141- Profiles/modes listed as bitmasks to make descriptors more compact
142- Addition of set_params for decoders (missing in OpenMAX AL)
143- Addition of AMR/AMR-WB encoding modes (missing in OpenMAX AL)
144- Addition of format information for WMA
145- Addition of encoding options when required (derived from OpenMAX IL)
146- Addition of rateControlSupported (missing in OpenMAX AL)
147
148Not supported:
149
150- Support for VoIP/circuit-switched calls is not the target of this
151 API. Support for dynamic bit-rate changes would require a tight
152 coupling between the DSP and the host stack, limiting power savings.
153
154- Packet-loss concealment is not supported. This would require an
155 additional interface to let the decoder synthesize data when frames
156 are lost during transmission. This may be added in the future.
157
158- Volume control/routing is not handled by this API. Devices exposing a
159 compressed data interface will be considered as regular ALSA devices;
160 volume changes and routing information will be provided with regular
161 ALSA kcontrols.
162
163- Embedded audio effects. Such effects should be enabled in the same
164 manner, no matter if the input was PCM or compressed.
165
166- multichannel IEC encoding. Unclear if this is required.
167
168- Encoding/decoding acceleration is not supported as mentioned
169 above. It is possible to route the output of a decoder to a capture
170 stream, or even implement transcoding capabilities. This routing
171 would be enabled with ALSA kcontrols.
172
173- Audio policy/resource management. This API does not provide any
174 hooks to query the utilization of the audio DSP, nor any premption
175 mechanisms.
176
177- No notion of underun/overrun. Since the bytes written are compressed
178 in nature and data written/read doesn't translate directly to
179 rendered output in time, this does not deal with underrun/overun and
180 maybe dealt in user-library
181
182Credits:
183- Mark Brown and Liam Girdwood for discussions on the need for this API
184- Harsha Priya for her work on intel_sst compressed API
185- Rakesh Ughreja for valuable feedback
186- Sing Nallasellan, Sikkandar Madar and Prasanna Samaga for
187 demonstrating and quantifying the benefits of audio offload on a
188 real platform.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 1f2463671a1a..8c20fbd8b42d 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -49,6 +49,7 @@ show up in /proc/sys/kernel:
49- panic 49- panic
50- panic_on_oops 50- panic_on_oops
51- panic_on_unrecovered_nmi 51- panic_on_unrecovered_nmi
52- panic_on_stackoverflow
52- pid_max 53- pid_max
53- powersave-nap [ PPC only ] 54- powersave-nap [ PPC only ]
54- printk 55- printk
@@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered.
393 394
394============================================================== 395==============================================================
395 396
397panic_on_stackoverflow:
398
399Controls the kernel's behavior when detecting the overflows of
400kernel, IRQ and exception stacks except a user stack.
401This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
402
4030: try to continue operation.
404
4051: panic immediately.
406
407==============================================================
408
409
396pid_max: 410pid_max:
397 411
398PID allocation wrap value. When the kernel's next PID value 412PID allocation wrap value. When the kernel's next PID value
@@ -401,6 +415,14 @@ PIDs of value pid_max or larger are not allocated.
401 415
402============================================================== 416==============================================================
403 417
418ns_last_pid:
419
420The last pid allocated in the current (the one task using this sysctl
421lives in) pid namespace. When selecting a pid for a next task on fork
422kernel tries to allocate a number starting from this one.
423
424==============================================================
425
404powersave-nap: (PPC only) 426powersave-nap: (PPC only)
405 427
406If set, Linux-PPC will use the 'nap' mode of powersaving, 428If set, Linux-PPC will use the 'nap' mode of powersaving,
diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.txt
index aa82ee4a5a87..194800410061 100644
--- a/Documentation/trace/events-kmem.txt
+++ b/Documentation/trace/events-kmem.txt
@@ -40,8 +40,8 @@ but the call_site can usually be used to extrapolate that information.
40================== 40==================
41mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s 41mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s
42mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d 42mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
43mm_page_free_direct page=%p pfn=%lu order=%d 43mm_page_free page=%p pfn=%lu order=%d
44mm_pagevec_free page=%p pfn=%lu order=%d cold=%d 44mm_page_free_batched page=%p pfn=%lu order=%d cold=%d
45 45
46These four events deal with page allocation and freeing. mm_page_alloc is 46These four events deal with page allocation and freeing. mm_page_alloc is
47a simple indicator of page allocator activity. Pages may be allocated from 47a simple indicator of page allocator activity. Pages may be allocated from
@@ -53,13 +53,13 @@ amounts of activity imply high activity on the zone->lock. Taking this lock
53impairs performance by disabling interrupts, dirtying cache lines between 53impairs performance by disabling interrupts, dirtying cache lines between
54CPUs and serialising many CPUs. 54CPUs and serialising many CPUs.
55 55
56When a page is freed directly by the caller, the mm_page_free_direct event 56When a page is freed directly by the caller, the only mm_page_free event
57is triggered. Significant amounts of activity here could indicate that the 57is triggered. Significant amounts of activity here could indicate that the
58callers should be batching their activities. 58callers should be batching their activities.
59 59
60When pages are freed using a pagevec, the mm_pagevec_free is 60When pages are freed in batch, the also mm_page_free_batched is triggered.
61triggered. Broadly speaking, pages are taken off the LRU lock in bulk and 61Broadly speaking, pages are taken off the LRU lock in bulk and
62freed in batch with a pagevec. Significant amounts of activity here could 62freed in batch with a page list. Significant amounts of activity here could
63indicate that the system is under memory pressure and can also indicate 63indicate that the system is under memory pressure and can also indicate
64contention on the zone->lru_lock. 64contention on the zone->lru_lock.
65 65
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index b510564aac7e..bb24c2a0e870 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -191,8 +191,6 @@ And for string fields they are:
191 191
192Currently, only exact string matches are supported. 192Currently, only exact string matches are supported.
193 193
194Currently, the maximum number of predicates in a filter is 16.
195
1965.2 Setting filters 1945.2 Setting filters
197------------------- 195-------------------
198 196
diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
index 7df50e8cf4d9..0a120aae33ce 100644
--- a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
@@ -17,8 +17,8 @@ use Getopt::Long;
17 17
18# Tracepoint events 18# Tracepoint events
19use constant MM_PAGE_ALLOC => 1; 19use constant MM_PAGE_ALLOC => 1;
20use constant MM_PAGE_FREE_DIRECT => 2; 20use constant MM_PAGE_FREE => 2;
21use constant MM_PAGEVEC_FREE => 3; 21use constant MM_PAGE_FREE_BATCHED => 3;
22use constant MM_PAGE_PCPU_DRAIN => 4; 22use constant MM_PAGE_PCPU_DRAIN => 4;
23use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5; 23use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
24use constant MM_PAGE_ALLOC_EXTFRAG => 6; 24use constant MM_PAGE_ALLOC_EXTFRAG => 6;
@@ -223,10 +223,10 @@ EVENT_PROCESS:
223 # Perl Switch() sucks majorly 223 # Perl Switch() sucks majorly
224 if ($tracepoint eq "mm_page_alloc") { 224 if ($tracepoint eq "mm_page_alloc") {
225 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; 225 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
226 } elsif ($tracepoint eq "mm_page_free_direct") { 226 } elsif ($tracepoint eq "mm_page_free") {
227 $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}++; 227 $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
228 } elsif ($tracepoint eq "mm_pagevec_free") { 228 } elsif ($tracepoint eq "mm_page_free_batched") {
229 $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}++; 229 $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
230 } elsif ($tracepoint eq "mm_page_pcpu_drain") { 230 } elsif ($tracepoint eq "mm_page_pcpu_drain") {
231 $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; 231 $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
232 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; 232 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
@@ -336,8 +336,8 @@ sub dump_stats {
336 $process_pid, 336 $process_pid,
337 $stats{$process_pid}->{MM_PAGE_ALLOC}, 337 $stats{$process_pid}->{MM_PAGE_ALLOC},
338 $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, 338 $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
339 $stats{$process_pid}->{MM_PAGE_FREE_DIRECT}, 339 $stats{$process_pid}->{MM_PAGE_FREE},
340 $stats{$process_pid}->{MM_PAGEVEC_FREE}, 340 $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
341 $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, 341 $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
342 $stats{$process_pid}->{HIGH_PCPU_DRAINS}, 342 $stats{$process_pid}->{HIGH_PCPU_DRAINS},
343 $stats{$process_pid}->{HIGH_PCPU_REFILLS}, 343 $stats{$process_pid}->{HIGH_PCPU_REFILLS},
@@ -364,8 +364,8 @@ sub aggregate_perprocesspid() {
364 364
365 $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; 365 $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
366 $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; 366 $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
367 $perprocess{$process}->{MM_PAGE_FREE_DIRECT} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}; 367 $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
368 $perprocess{$process}->{MM_PAGEVEC_FREE} += $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}; 368 $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
369 $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; 369 $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
370 $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; 370 $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
371 $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; 371 $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt
index 87bee3c129ba..058cc6c9dc56 100644
--- a/Documentation/trace/tracepoint-analysis.txt
+++ b/Documentation/trace/tracepoint-analysis.txt
@@ -93,14 +93,14 @@ By specifying the -a switch and analysing sleep, the system-wide events
93for a duration of time can be examined. 93for a duration of time can be examined.
94 94
95 $ perf stat -a \ 95 $ perf stat -a \
96 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ 96 -e kmem:mm_page_alloc -e kmem:mm_page_free \
97 -e kmem:mm_pagevec_free \ 97 -e kmem:mm_page_free_batched \
98 sleep 10 98 sleep 10
99 Performance counter stats for 'sleep 10': 99 Performance counter stats for 'sleep 10':
100 100
101 9630 kmem:mm_page_alloc 101 9630 kmem:mm_page_alloc
102 2143 kmem:mm_page_free_direct 102 2143 kmem:mm_page_free
103 7424 kmem:mm_pagevec_free 103 7424 kmem:mm_page_free_batched
104 104
105 10.002577764 seconds time elapsed 105 10.002577764 seconds time elapsed
106 106
@@ -119,15 +119,15 @@ basis using set_ftrace_pid.
119Events can be activated and tracked for the duration of a process on a local 119Events can be activated and tracked for the duration of a process on a local
120basis using PCL such as follows. 120basis using PCL such as follows.
121 121
122 $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ 122 $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \
123 -e kmem:mm_pagevec_free ./hackbench 10 123 -e kmem:mm_page_free_batched ./hackbench 10
124 Time: 0.909 124 Time: 0.909
125 125
126 Performance counter stats for './hackbench 10': 126 Performance counter stats for './hackbench 10':
127 127
128 17803 kmem:mm_page_alloc 128 17803 kmem:mm_page_alloc
129 12398 kmem:mm_page_free_direct 129 12398 kmem:mm_page_free
130 4827 kmem:mm_pagevec_free 130 4827 kmem:mm_page_free_batched
131 131
132 0.973913387 seconds time elapsed 132 0.973913387 seconds time elapsed
133 133
@@ -146,8 +146,8 @@ to know what the standard deviation is. By and large, this is left to the
146performance analyst to do it by hand. In the event that the discrete event 146performance analyst to do it by hand. In the event that the discrete event
147occurrences are useful to the performance analyst, then perf can be used. 147occurrences are useful to the performance analyst, then perf can be used.
148 148
149 $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct 149 $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free
150 -e kmem:mm_pagevec_free ./hackbench 10 150 -e kmem:mm_page_free_batched ./hackbench 10
151 Time: 0.890 151 Time: 0.890
152 Time: 0.895 152 Time: 0.895
153 Time: 0.915 153 Time: 0.915
@@ -157,8 +157,8 @@ occurrences are useful to the performance analyst, then perf can be used.
157 Performance counter stats for './hackbench 10' (5 runs): 157 Performance counter stats for './hackbench 10' (5 runs):
158 158
159 16630 kmem:mm_page_alloc ( +- 3.542% ) 159 16630 kmem:mm_page_alloc ( +- 3.542% )
160 11486 kmem:mm_page_free_direct ( +- 4.771% ) 160 11486 kmem:mm_page_free ( +- 4.771% )
161 4730 kmem:mm_pagevec_free ( +- 2.325% ) 161 4730 kmem:mm_page_free_batched ( +- 2.325% )
162 162
163 0.982653002 seconds time elapsed ( +- 1.448% ) 163 0.982653002 seconds time elapsed ( +- 1.448% )
164 164
@@ -168,15 +168,15 @@ aggregation of discrete events, then a script would need to be developed.
168Using --repeat, it is also possible to view how events are fluctuating over 168Using --repeat, it is also possible to view how events are fluctuating over
169time on a system-wide basis using -a and sleep. 169time on a system-wide basis using -a and sleep.
170 170
171 $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ 171 $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \
172 -e kmem:mm_pagevec_free \ 172 -e kmem:mm_page_free_batched \
173 -a --repeat 10 \ 173 -a --repeat 10 \
174 sleep 1 174 sleep 1
175 Performance counter stats for 'sleep 1' (10 runs): 175 Performance counter stats for 'sleep 1' (10 runs):
176 176
177 1066 kmem:mm_page_alloc ( +- 26.148% ) 177 1066 kmem:mm_page_alloc ( +- 26.148% )
178 182 kmem:mm_page_free_direct ( +- 5.464% ) 178 182 kmem:mm_page_free ( +- 5.464% )
179 890 kmem:mm_pagevec_free ( +- 30.079% ) 179 890 kmem:mm_page_free_batched ( +- 30.079% )
180 180
181 1.002251757 seconds time elapsed ( +- 0.005% ) 181 1.002251757 seconds time elapsed ( +- 0.005% )
182 182
@@ -220,8 +220,8 @@ were generating events within the kernel. To begin this sort of analysis, the
220data must be recorded. At the time of writing, this required root: 220data must be recorded. At the time of writing, this required root:
221 221
222 $ perf record -c 1 \ 222 $ perf record -c 1 \
223 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ 223 -e kmem:mm_page_alloc -e kmem:mm_page_free \
224 -e kmem:mm_pagevec_free \ 224 -e kmem:mm_page_free_batched \
225 ./hackbench 10 225 ./hackbench 10
226 Time: 0.894 226 Time: 0.894
227 [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ] 227 [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ]
@@ -260,8 +260,8 @@ noticed that X was generating an insane amount of page allocations so let's look
260at it: 260at it:
261 261
262 $ perf record -c 1 -f \ 262 $ perf record -c 1 -f \
263 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ 263 -e kmem:mm_page_alloc -e kmem:mm_page_free \
264 -e kmem:mm_pagevec_free \ 264 -e kmem:mm_page_free_batched \
265 -p `pidof X` 265 -p `pidof X`
266 266
267This was interrupted after a few seconds and 267This was interrupted after a few seconds and
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
index a4efa0462f05..5335fa8b06eb 100644
--- a/Documentation/usb/usbmon.txt
+++ b/Documentation/usb/usbmon.txt
@@ -47,10 +47,11 @@ This allows to filter away annoying devices that talk continuously.
47 47
482. Find which bus connects to the desired device 482. Find which bus connects to the desired device
49 49
50Run "cat /proc/bus/usb/devices", and find the T-line which corresponds to 50Run "cat /sys/kernel/debug/usb/devices", and find the T-line which corresponds
51the device. Usually you do it by looking for the vendor string. If you have 51to the device. Usually you do it by looking for the vendor string. If you have
52many similar devices, unplug one and compare two /proc/bus/usb/devices outputs. 52many similar devices, unplug one and compare the two
53The T-line will have a bus number. Example: 53/sys/kernel/debug/usb/devices outputs. The T-line will have a bus number.
54Example:
54 55
55T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 56T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0
56D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 57D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
@@ -58,7 +59,10 @@ P: Vendor=0557 ProdID=2004 Rev= 1.00
58S: Manufacturer=ATEN 59S: Manufacturer=ATEN
59S: Product=UC100KM V2.00 60S: Product=UC100KM V2.00
60 61
61Bus=03 means it's bus 3. 62"Bus=03" means it's bus 3. Alternatively, you can look at the output from
63"lsusb" and get the bus number from the appropriate line. Example:
64
65Bus 003 Device 002: ID 0557:2004 ATEN UC100KM V2.00
62 66
633. Start 'cat' 673. Start 'cat'
64 68
diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt
index b7d401e0eae9..014423e2824c 100644
--- a/Documentation/vgaarbiter.txt
+++ b/Documentation/vgaarbiter.txt
@@ -177,7 +177,7 @@ II. Credits
177 177
178Benjamin Herrenschmidt (IBM?) started this work when he discussed such design 178Benjamin Herrenschmidt (IBM?) started this work when he discussed such design
179with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and 179with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and
180Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work 180Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work
181enhancing the kernel code to adapt as a kernel module and also did the 181enhancing the kernel code to adapt as a kernel module and also did the
182implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave 182implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave
183Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree. 183Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e2a4b5287361..e1d94bf4056e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1466,6 +1466,31 @@ is supported; 2 if the processor requires all virtual machines to have
1466an RMA, or 1 if the processor can use an RMA but doesn't require it, 1466an RMA, or 1 if the processor can use an RMA but doesn't require it,
1467because it supports the Virtual RMA (VRMA) facility. 1467because it supports the Virtual RMA (VRMA) facility.
1468 1468
14694.64 KVM_NMI
1470
1471Capability: KVM_CAP_USER_NMI
1472Architectures: x86
1473Type: vcpu ioctl
1474Parameters: none
1475Returns: 0 on success, -1 on error
1476
1477Queues an NMI on the thread's vcpu. Note this is well defined only
1478when KVM_CREATE_IRQCHIP has not been called, since this is an interface
1479between the virtual cpu core and virtual local APIC. After KVM_CREATE_IRQCHIP
1480has been called, this interface is completely emulated within the kernel.
1481
1482To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the
1483following algorithm:
1484
1485 - pause the vpcu
1486 - read the local APIC's state (KVM_GET_LAPIC)
1487 - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1)
1488 - if so, issue KVM_NMI
1489 - resume the vcpu
1490
1491Some guests configure the LINT1 NMI input to cause a panic, aiding in
1492debugging.
1493
14695. The kvm_run structure 14945. The kvm_run structure
1470 1495
1471Application code obtains a pointer to the kvm_run structure by 1496Application code obtains a pointer to the kvm_run structure by
diff --git a/Documentation/virtual/lguest/.gitignore b/Documentation/virtual/lguest/.gitignore
deleted file mode 100644
index 115587fd5f65..000000000000
--- a/Documentation/virtual/lguest/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
1lguest
diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile
deleted file mode 100644
index 0ac34206f7a7..000000000000
--- a/Documentation/virtual/lguest/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1# This creates the demonstration utility "lguest" which runs a Linux guest.
2# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
3CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
4
5all: lguest
6
7clean:
8 rm -f lguest
diff --git a/Documentation/virtual/lguest/extract b/Documentation/virtual/lguest/extract
deleted file mode 100644
index 7730bb6e4b94..000000000000
--- a/Documentation/virtual/lguest/extract
+++ /dev/null
@@ -1,58 +0,0 @@
1#! /bin/sh
2
3set -e
4
5PREFIX=$1
6shift
7
8trap 'rm -r $TMPDIR' 0
9TMPDIR=`mktemp -d`
10
11exec 3>/dev/null
12for f; do
13 while IFS="
14" read -r LINE; do
15 case "$LINE" in
16 *$PREFIX:[0-9]*:\**)
17 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
18 if [ -f $TMPDIR/$NUM ]; then
19 echo "$TMPDIR/$NUM already exits prior to $f"
20 exit 1
21 fi
22 exec 3>>$TMPDIR/$NUM
23 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
24 /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
25 ;;
26 *$PREFIX:[0-9]*)
27 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
28 if [ -f $TMPDIR/$NUM ]; then
29 echo "$TMPDIR/$NUM already exits prior to $f"
30 exit 1
31 fi
32 exec 3>>$TMPDIR/$NUM
33 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
34 /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
35 ;;
36 *:\**)
37 /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
38 echo >&3
39 exec 3>/dev/null
40 ;;
41 *)
42 /bin/echo "$LINE" >&3
43 ;;
44 esac
45 done < $f
46 echo >&3
47 exec 3>/dev/null
48done
49
50LASTFILE=""
51for f in $TMPDIR/*; do
52 if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
53 LASTFILE=$(cat $TMPDIR/.$(basename $f) )
54 echo "[ $LASTFILE ]"
55 fi
56 cat $f
57done
58
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
deleted file mode 100644
index c095d79cae73..000000000000
--- a/Documentation/virtual/lguest/lguest.c
+++ /dev/null
@@ -1,2065 +0,0 @@
1/*P:100
2 * This is the Launcher code, a simple program which lays out the "physical"
3 * memory for the new Guest by mapping the kernel image and the virtual
4 * devices, then opens /dev/lguest to tell the kernel about the Guest and
5 * control it.
6:*/
7#define _LARGEFILE64_SOURCE
8#define _GNU_SOURCE
9#include <stdio.h>
10#include <string.h>
11#include <unistd.h>
12#include <err.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <elf.h>
16#include <sys/mman.h>
17#include <sys/param.h>
18#include <sys/types.h>
19#include <sys/stat.h>
20#include <sys/wait.h>
21#include <sys/eventfd.h>
22#include <fcntl.h>
23#include <stdbool.h>
24#include <errno.h>
25#include <ctype.h>
26#include <sys/socket.h>
27#include <sys/ioctl.h>
28#include <sys/time.h>
29#include <time.h>
30#include <netinet/in.h>
31#include <net/if.h>
32#include <linux/sockios.h>
33#include <linux/if_tun.h>
34#include <sys/uio.h>
35#include <termios.h>
36#include <getopt.h>
37#include <assert.h>
38#include <sched.h>
39#include <limits.h>
40#include <stddef.h>
41#include <signal.h>
42#include <pwd.h>
43#include <grp.h>
44
45#include <linux/virtio_config.h>
46#include <linux/virtio_net.h>
47#include <linux/virtio_blk.h>
48#include <linux/virtio_console.h>
49#include <linux/virtio_rng.h>
50#include <linux/virtio_ring.h>
51#include <asm/bootparam.h>
52#include "../../../include/linux/lguest_launcher.h"
53/*L:110
54 * We can ignore the 43 include files we need for this program, but I do want
55 * to draw attention to the use of kernel-style types.
56 *
57 * As Linus said, "C is a Spartan language, and so should your naming be." I
58 * like these abbreviations, so we define them here. Note that u64 is always
59 * unsigned long long, which works on all Linux systems: this means that we can
60 * use %llu in printf for any u64.
61 */
62typedef unsigned long long u64;
63typedef uint32_t u32;
64typedef uint16_t u16;
65typedef uint8_t u8;
66/*:*/
67
68#define BRIDGE_PFX "bridge:"
69#ifndef SIOCBRADDIF
70#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
71#endif
72/* We can have up to 256 pages for devices. */
73#define DEVICE_PAGES 256
74/* This will occupy 3 pages: it must be a power of 2. */
75#define VIRTQUEUE_NUM 256
76
77/*L:120
78 * verbose is both a global flag and a macro. The C preprocessor allows
79 * this, and although I wouldn't recommend it, it works quite nicely here.
80 */
81static bool verbose;
82#define verbose(args...) \
83 do { if (verbose) printf(args); } while(0)
84/*:*/
85
86/* The pointer to the start of guest memory. */
87static void *guest_base;
88/* The maximum guest physical address allowed, and maximum possible. */
89static unsigned long guest_limit, guest_max;
90/* The /dev/lguest file descriptor. */
91static int lguest_fd;
92
93/* a per-cpu variable indicating whose vcpu is currently running */
94static unsigned int __thread cpu_id;
95
96/* This is our list of devices. */
97struct device_list {
98 /* Counter to assign interrupt numbers. */
99 unsigned int next_irq;
100
101 /* Counter to print out convenient device numbers. */
102 unsigned int device_num;
103
104 /* The descriptor page for the devices. */
105 u8 *descpage;
106
107 /* A single linked list of devices. */
108 struct device *dev;
109 /* And a pointer to the last device for easy append. */
110 struct device *lastdev;
111};
112
113/* The list of Guest devices, based on command line arguments. */
114static struct device_list devices;
115
116/* The device structure describes a single device. */
117struct device {
118 /* The linked-list pointer. */
119 struct device *next;
120
121 /* The device's descriptor, as mapped into the Guest. */
122 struct lguest_device_desc *desc;
123
124 /* We can't trust desc values once Guest has booted: we use these. */
125 unsigned int feature_len;
126 unsigned int num_vq;
127
128 /* The name of this device, for --verbose. */
129 const char *name;
130
131 /* Any queues attached to this device */
132 struct virtqueue *vq;
133
134 /* Is it operational */
135 bool running;
136
137 /* Device-specific data. */
138 void *priv;
139};
140
141/* The virtqueue structure describes a queue attached to a device. */
142struct virtqueue {
143 struct virtqueue *next;
144
145 /* Which device owns me. */
146 struct device *dev;
147
148 /* The configuration for this queue. */
149 struct lguest_vqconfig config;
150
151 /* The actual ring of buffers. */
152 struct vring vring;
153
154 /* Last available index we saw. */
155 u16 last_avail_idx;
156
157 /* How many are used since we sent last irq? */
158 unsigned int pending_used;
159
160 /* Eventfd where Guest notifications arrive. */
161 int eventfd;
162
163 /* Function for the thread which is servicing this virtqueue. */
164 void (*service)(struct virtqueue *vq);
165 pid_t thread;
166};
167
168/* Remember the arguments to the program so we can "reboot" */
169static char **main_args;
170
171/* The original tty settings to restore on exit. */
172static struct termios orig_term;
173
174/*
175 * We have to be careful with barriers: our devices are all run in separate
176 * threads and so we need to make sure that changes visible to the Guest happen
177 * in precise order.
178 */
179#define wmb() __asm__ __volatile__("" : : : "memory")
180#define mb() __asm__ __volatile__("" : : : "memory")
181
182/*
183 * Convert an iovec element to the given type.
184 *
185 * This is a fairly ugly trick: we need to know the size of the type and
186 * alignment requirement to check the pointer is kosher. It's also nice to
187 * have the name of the type in case we report failure.
188 *
189 * Typing those three things all the time is cumbersome and error prone, so we
190 * have a macro which sets them all up and passes to the real function.
191 */
192#define convert(iov, type) \
193 ((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
194
195static void *_convert(struct iovec *iov, size_t size, size_t align,
196 const char *name)
197{
198 if (iov->iov_len != size)
199 errx(1, "Bad iovec size %zu for %s", iov->iov_len, name);
200 if ((unsigned long)iov->iov_base % align != 0)
201 errx(1, "Bad alignment %p for %s", iov->iov_base, name);
202 return iov->iov_base;
203}
204
205/* Wrapper for the last available index. Makes it easier to change. */
206#define lg_last_avail(vq) ((vq)->last_avail_idx)
207
208/*
209 * The virtio configuration space is defined to be little-endian. x86 is
210 * little-endian too, but it's nice to be explicit so we have these helpers.
211 */
212#define cpu_to_le16(v16) (v16)
213#define cpu_to_le32(v32) (v32)
214#define cpu_to_le64(v64) (v64)
215#define le16_to_cpu(v16) (v16)
216#define le32_to_cpu(v32) (v32)
217#define le64_to_cpu(v64) (v64)
218
219/* Is this iovec empty? */
220static bool iov_empty(const struct iovec iov[], unsigned int num_iov)
221{
222 unsigned int i;
223
224 for (i = 0; i < num_iov; i++)
225 if (iov[i].iov_len)
226 return false;
227 return true;
228}
229
230/* Take len bytes from the front of this iovec. */
231static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
232{
233 unsigned int i;
234
235 for (i = 0; i < num_iov; i++) {
236 unsigned int used;
237
238 used = iov[i].iov_len < len ? iov[i].iov_len : len;
239 iov[i].iov_base += used;
240 iov[i].iov_len -= used;
241 len -= used;
242 }
243 assert(len == 0);
244}
245
246/* The device virtqueue descriptors are followed by feature bitmasks. */
247static u8 *get_feature_bits(struct device *dev)
248{
249 return (u8 *)(dev->desc + 1)
250 + dev->num_vq * sizeof(struct lguest_vqconfig);
251}
252
253/*L:100
254 * The Launcher code itself takes us out into userspace, that scary place where
255 * pointers run wild and free! Unfortunately, like most userspace programs,
256 * it's quite boring (which is why everyone likes to hack on the kernel!).
257 * Perhaps if you make up an Lguest Drinking Game at this point, it will get
258 * you through this section. Or, maybe not.
259 *
260 * The Launcher sets up a big chunk of memory to be the Guest's "physical"
261 * memory and stores it in "guest_base". In other words, Guest physical ==
262 * Launcher virtual with an offset.
263 *
264 * This can be tough to get your head around, but usually it just means that we
265 * use these trivial conversion functions when the Guest gives us its
266 * "physical" addresses:
267 */
268static void *from_guest_phys(unsigned long addr)
269{
270 return guest_base + addr;
271}
272
273static unsigned long to_guest_phys(const void *addr)
274{
275 return (addr - guest_base);
276}
277
278/*L:130
279 * Loading the Kernel.
280 *
281 * We start with couple of simple helper routines. open_or_die() avoids
282 * error-checking code cluttering the callers:
283 */
284static int open_or_die(const char *name, int flags)
285{
286 int fd = open(name, flags);
287 if (fd < 0)
288 err(1, "Failed to open %s", name);
289 return fd;
290}
291
292/* map_zeroed_pages() takes a number of pages. */
293static void *map_zeroed_pages(unsigned int num)
294{
295 int fd = open_or_die("/dev/zero", O_RDONLY);
296 void *addr;
297
298 /*
299 * We use a private mapping (ie. if we write to the page, it will be
300 * copied). We allocate an extra two pages PROT_NONE to act as guard
301 * pages against read/write attempts that exceed allocated space.
302 */
303 addr = mmap(NULL, getpagesize() * (num+2),
304 PROT_NONE, MAP_PRIVATE, fd, 0);
305
306 if (addr == MAP_FAILED)
307 err(1, "Mmapping %u pages of /dev/zero", num);
308
309 if (mprotect(addr + getpagesize(), getpagesize() * num,
310 PROT_READ|PROT_WRITE) == -1)
311 err(1, "mprotect rw %u pages failed", num);
312
313 /*
314 * One neat mmap feature is that you can close the fd, and it
315 * stays mapped.
316 */
317 close(fd);
318
319 /* Return address after PROT_NONE page */
320 return addr + getpagesize();
321}
322
323/* Get some more pages for a device. */
324static void *get_pages(unsigned int num)
325{
326 void *addr = from_guest_phys(guest_limit);
327
328 guest_limit += num * getpagesize();
329 if (guest_limit > guest_max)
330 errx(1, "Not enough memory for devices");
331 return addr;
332}
333
334/*
335 * This routine is used to load the kernel or initrd. It tries mmap, but if
336 * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
337 * it falls back to reading the memory in.
338 */
339static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
340{
341 ssize_t r;
342
343 /*
344 * We map writable even though for some segments are marked read-only.
345 * The kernel really wants to be writable: it patches its own
346 * instructions.
347 *
348 * MAP_PRIVATE means that the page won't be copied until a write is
349 * done to it. This allows us to share untouched memory between
350 * Guests.
351 */
352 if (mmap(addr, len, PROT_READ|PROT_WRITE,
353 MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
354 return;
355
356 /* pread does a seek and a read in one shot: saves a few lines. */
357 r = pread(fd, addr, len, offset);
358 if (r != len)
359 err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
360}
361
362/*
363 * This routine takes an open vmlinux image, which is in ELF, and maps it into
364 * the Guest memory. ELF = Embedded Linking Format, which is the format used
365 * by all modern binaries on Linux including the kernel.
366 *
367 * The ELF headers give *two* addresses: a physical address, and a virtual
368 * address. We use the physical address; the Guest will map itself to the
369 * virtual address.
370 *
371 * We return the starting address.
372 */
373static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
374{
375 Elf32_Phdr phdr[ehdr->e_phnum];
376 unsigned int i;
377
378 /*
379 * Sanity checks on the main ELF header: an x86 executable with a
380 * reasonable number of correctly-sized program headers.
381 */
382 if (ehdr->e_type != ET_EXEC
383 || ehdr->e_machine != EM_386
384 || ehdr->e_phentsize != sizeof(Elf32_Phdr)
385 || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
386 errx(1, "Malformed elf header");
387
388 /*
389 * An ELF executable contains an ELF header and a number of "program"
390 * headers which indicate which parts ("segments") of the program to
391 * load where.
392 */
393
394 /* We read in all the program headers at once: */
395 if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
396 err(1, "Seeking to program headers");
397 if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
398 err(1, "Reading program headers");
399
400 /*
401 * Try all the headers: there are usually only three. A read-only one,
402 * a read-write one, and a "note" section which we don't load.
403 */
404 for (i = 0; i < ehdr->e_phnum; i++) {
405 /* If this isn't a loadable segment, we ignore it */
406 if (phdr[i].p_type != PT_LOAD)
407 continue;
408
409 verbose("Section %i: size %i addr %p\n",
410 i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
411
412 /* We map this section of the file at its physical address. */
413 map_at(elf_fd, from_guest_phys(phdr[i].p_paddr),
414 phdr[i].p_offset, phdr[i].p_filesz);
415 }
416
417 /* The entry point is given in the ELF header. */
418 return ehdr->e_entry;
419}
420
421/*L:150
422 * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed
423 * to jump into it and it will unpack itself. We used to have to perform some
424 * hairy magic because the unpacking code scared me.
425 *
426 * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
427 * a small patch to jump over the tricky bits in the Guest, so now we just read
428 * the funky header so we know where in the file to load, and away we go!
429 */
430static unsigned long load_bzimage(int fd)
431{
432 struct boot_params boot;
433 int r;
434 /* Modern bzImages get loaded at 1M. */
435 void *p = from_guest_phys(0x100000);
436
437 /*
438 * Go back to the start of the file and read the header. It should be
439 * a Linux boot header (see Documentation/x86/boot.txt)
440 */
441 lseek(fd, 0, SEEK_SET);
442 read(fd, &boot, sizeof(boot));
443
444 /* Inside the setup_hdr, we expect the magic "HdrS" */
445 if (memcmp(&boot.hdr.header, "HdrS", 4) != 0)
446 errx(1, "This doesn't look like a bzImage to me");
447
448 /* Skip over the extra sectors of the header. */
449 lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET);
450
451 /* Now read everything into memory. in nice big chunks. */
452 while ((r = read(fd, p, 65536)) > 0)
453 p += r;
454
455 /* Finally, code32_start tells us where to enter the kernel. */
456 return boot.hdr.code32_start;
457}
458
459/*L:140
460 * Loading the kernel is easy when it's a "vmlinux", but most kernels
461 * come wrapped up in the self-decompressing "bzImage" format. With a little
462 * work, we can load those, too.
463 */
464static unsigned long load_kernel(int fd)
465{
466 Elf32_Ehdr hdr;
467
468 /* Read in the first few bytes. */
469 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
470 err(1, "Reading kernel");
471
472 /* If it's an ELF file, it starts with "\177ELF" */
473 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
474 return map_elf(fd, &hdr);
475
476 /* Otherwise we assume it's a bzImage, and try to load it. */
477 return load_bzimage(fd);
478}
479
480/*
481 * This is a trivial little helper to align pages. Andi Kleen hated it because
482 * it calls getpagesize() twice: "it's dumb code."
483 *
484 * Kernel guys get really het up about optimization, even when it's not
485 * necessary. I leave this code as a reaction against that.
486 */
487static inline unsigned long page_align(unsigned long addr)
488{
489 /* Add upwards and truncate downwards. */
490 return ((addr + getpagesize()-1) & ~(getpagesize()-1));
491}
492
493/*L:180
494 * An "initial ram disk" is a disk image loaded into memory along with the
495 * kernel which the kernel can use to boot from without needing any drivers.
496 * Most distributions now use this as standard: the initrd contains the code to
497 * load the appropriate driver modules for the current machine.
498 *
499 * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
500 * kernels. He sent me this (and tells me when I break it).
501 */
502static unsigned long load_initrd(const char *name, unsigned long mem)
503{
504 int ifd;
505 struct stat st;
506 unsigned long len;
507
508 ifd = open_or_die(name, O_RDONLY);
509 /* fstat() is needed to get the file size. */
510 if (fstat(ifd, &st) < 0)
511 err(1, "fstat() on initrd '%s'", name);
512
513 /*
514 * We map the initrd at the top of memory, but mmap wants it to be
515 * page-aligned, so we round the size up for that.
516 */
517 len = page_align(st.st_size);
518 map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
519 /*
520 * Once a file is mapped, you can close the file descriptor. It's a
521 * little odd, but quite useful.
522 */
523 close(ifd);
524 verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
525
526 /* We return the initrd size. */
527 return len;
528}
529/*:*/
530
531/*
532 * Simple routine to roll all the commandline arguments together with spaces
533 * between them.
534 */
535static void concat(char *dst, char *args[])
536{
537 unsigned int i, len = 0;
538
539 for (i = 0; args[i]; i++) {
540 if (i) {
541 strcat(dst+len, " ");
542 len++;
543 }
544 strcpy(dst+len, args[i]);
545 len += strlen(args[i]);
546 }
547 /* In case it's empty. */
548 dst[len] = '\0';
549}
550
551/*L:185
552 * This is where we actually tell the kernel to initialize the Guest. We
553 * saw the arguments it expects when we looked at initialize() in lguest_user.c:
554 * the base of Guest "physical" memory, the top physical page to allow and the
555 * entry point for the Guest.
556 */
557static void tell_kernel(unsigned long start)
558{
559 unsigned long args[] = { LHREQ_INITIALIZE,
560 (unsigned long)guest_base,
561 guest_limit / getpagesize(), start };
562 verbose("Guest: %p - %p (%#lx)\n",
563 guest_base, guest_base + guest_limit, guest_limit);
564 lguest_fd = open_or_die("/dev/lguest", O_RDWR);
565 if (write(lguest_fd, args, sizeof(args)) < 0)
566 err(1, "Writing to /dev/lguest");
567}
568/*:*/
569
570/*L:200
571 * Device Handling.
572 *
573 * When the Guest gives us a buffer, it sends an array of addresses and sizes.
574 * We need to make sure it's not trying to reach into the Launcher itself, so
575 * we have a convenient routine which checks it and exits with an error message
576 * if something funny is going on:
577 */
578static void *_check_pointer(unsigned long addr, unsigned int size,
579 unsigned int line)
580{
581 /*
582 * Check if the requested address and size exceeds the allocated memory,
583 * or addr + size wraps around.
584 */
585 if ((addr + size) > guest_limit || (addr + size) < addr)
586 errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
587 /*
588 * We return a pointer for the caller's convenience, now we know it's
589 * safe to use.
590 */
591 return from_guest_phys(addr);
592}
593/* A macro which transparently hands the line number to the real function. */
594#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
595
596/*
597 * Each buffer in the virtqueues is actually a chain of descriptors. This
598 * function returns the next descriptor in the chain, or vq->vring.num if we're
599 * at the end.
600 */
601static unsigned next_desc(struct vring_desc *desc,
602 unsigned int i, unsigned int max)
603{
604 unsigned int next;
605
606 /* If this descriptor says it doesn't chain, we're done. */
607 if (!(desc[i].flags & VRING_DESC_F_NEXT))
608 return max;
609
610 /* Check they're not leading us off end of descriptors. */
611 next = desc[i].next;
612 /* Make sure compiler knows to grab that: we don't want it changing! */
613 wmb();
614
615 if (next >= max)
616 errx(1, "Desc next is %u", next);
617
618 return next;
619}
620
621/*
622 * This actually sends the interrupt for this virtqueue, if we've used a
623 * buffer.
624 */
625static void trigger_irq(struct virtqueue *vq)
626{
627 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
628
629 /* Don't inform them if nothing used. */
630 if (!vq->pending_used)
631 return;
632 vq->pending_used = 0;
633
634 /* If they don't want an interrupt, don't send one... */
635 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
636 return;
637 }
638
639 /* Send the Guest an interrupt tell them we used something up. */
640 if (write(lguest_fd, buf, sizeof(buf)) != 0)
641 err(1, "Triggering irq %i", vq->config.irq);
642}
643
644/*
645 * This looks in the virtqueue for the first available buffer, and converts
646 * it to an iovec for convenient access. Since descriptors consist of some
647 * number of output then some number of input descriptors, it's actually two
648 * iovecs, but we pack them into one and note how many of each there were.
649 *
650 * This function waits if necessary, and returns the descriptor number found.
651 */
652static unsigned wait_for_vq_desc(struct virtqueue *vq,
653 struct iovec iov[],
654 unsigned int *out_num, unsigned int *in_num)
655{
656 unsigned int i, head, max;
657 struct vring_desc *desc;
658 u16 last_avail = lg_last_avail(vq);
659
660 /* There's nothing available? */
661 while (last_avail == vq->vring.avail->idx) {
662 u64 event;
663
664 /*
665 * Since we're about to sleep, now is a good time to tell the
666 * Guest about what we've used up to now.
667 */
668 trigger_irq(vq);
669
670 /* OK, now we need to know about added descriptors. */
671 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
672
673 /*
674 * They could have slipped one in as we were doing that: make
675 * sure it's written, then check again.
676 */
677 mb();
678 if (last_avail != vq->vring.avail->idx) {
679 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
680 break;
681 }
682
683 /* Nothing new? Wait for eventfd to tell us they refilled. */
684 if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
685 errx(1, "Event read failed?");
686
687 /* We don't need to be notified again. */
688 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
689 }
690
691 /* Check it isn't doing very strange things with descriptor numbers. */
692 if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
693 errx(1, "Guest moved used index from %u to %u",
694 last_avail, vq->vring.avail->idx);
695
696 /*
697 * Grab the next descriptor number they're advertising, and increment
698 * the index we've seen.
699 */
700 head = vq->vring.avail->ring[last_avail % vq->vring.num];
701 lg_last_avail(vq)++;
702
703 /* If their number is silly, that's a fatal mistake. */
704 if (head >= vq->vring.num)
705 errx(1, "Guest says index %u is available", head);
706
707 /* When we start there are none of either input nor output. */
708 *out_num = *in_num = 0;
709
710 max = vq->vring.num;
711 desc = vq->vring.desc;
712 i = head;
713
714 /*
715 * If this is an indirect entry, then this buffer contains a descriptor
716 * table which we handle as if it's any normal descriptor chain.
717 */
718 if (desc[i].flags & VRING_DESC_F_INDIRECT) {
719 if (desc[i].len % sizeof(struct vring_desc))
720 errx(1, "Invalid size for indirect buffer table");
721
722 max = desc[i].len / sizeof(struct vring_desc);
723 desc = check_pointer(desc[i].addr, desc[i].len);
724 i = 0;
725 }
726
727 do {
728 /* Grab the first descriptor, and check it's OK. */
729 iov[*out_num + *in_num].iov_len = desc[i].len;
730 iov[*out_num + *in_num].iov_base
731 = check_pointer(desc[i].addr, desc[i].len);
732 /* If this is an input descriptor, increment that count. */
733 if (desc[i].flags & VRING_DESC_F_WRITE)
734 (*in_num)++;
735 else {
736 /*
737 * If it's an output descriptor, they're all supposed
738 * to come before any input descriptors.
739 */
740 if (*in_num)
741 errx(1, "Descriptor has out after in");
742 (*out_num)++;
743 }
744
745 /* If we've got too many, that implies a descriptor loop. */
746 if (*out_num + *in_num > max)
747 errx(1, "Looped descriptor");
748 } while ((i = next_desc(desc, i, max)) != max);
749
750 return head;
751}
752
753/*
754 * After we've used one of their buffers, we tell the Guest about it. Sometime
755 * later we'll want to send them an interrupt using trigger_irq(); note that
756 * wait_for_vq_desc() does that for us if it has to wait.
757 */
758static void add_used(struct virtqueue *vq, unsigned int head, int len)
759{
760 struct vring_used_elem *used;
761
762 /*
763 * The virtqueue contains a ring of used buffers. Get a pointer to the
764 * next entry in that used ring.
765 */
766 used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
767 used->id = head;
768 used->len = len;
769 /* Make sure buffer is written before we update index. */
770 wmb();
771 vq->vring.used->idx++;
772 vq->pending_used++;
773}
774
775/* And here's the combo meal deal. Supersize me! */
776static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
777{
778 add_used(vq, head, len);
779 trigger_irq(vq);
780}
781
782/*
783 * The Console
784 *
785 * We associate some data with the console for our exit hack.
786 */
787struct console_abort {
788 /* How many times have they hit ^C? */
789 int count;
790 /* When did they start? */
791 struct timeval start;
792};
793
794/* This is the routine which handles console input (ie. stdin). */
795static void console_input(struct virtqueue *vq)
796{
797 int len;
798 unsigned int head, in_num, out_num;
799 struct console_abort *abort = vq->dev->priv;
800 struct iovec iov[vq->vring.num];
801
802 /* Make sure there's a descriptor available. */
803 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
804 if (out_num)
805 errx(1, "Output buffers in console in queue?");
806
807 /* Read into it. This is where we usually wait. */
808 len = readv(STDIN_FILENO, iov, in_num);
809 if (len <= 0) {
810 /* Ran out of input? */
811 warnx("Failed to get console input, ignoring console.");
812 /*
813 * For simplicity, dying threads kill the whole Launcher. So
814 * just nap here.
815 */
816 for (;;)
817 pause();
818 }
819
820 /* Tell the Guest we used a buffer. */
821 add_used_and_trigger(vq, head, len);
822
823 /*
824 * Three ^C within one second? Exit.
825 *
826 * This is such a hack, but works surprisingly well. Each ^C has to
827 * be in a buffer by itself, so they can't be too fast. But we check
828 * that we get three within about a second, so they can't be too
829 * slow.
830 */
831 if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
832 abort->count = 0;
833 return;
834 }
835
836 abort->count++;
837 if (abort->count == 1)
838 gettimeofday(&abort->start, NULL);
839 else if (abort->count == 3) {
840 struct timeval now;
841 gettimeofday(&now, NULL);
842 /* Kill all Launcher processes with SIGINT, like normal ^C */
843 if (now.tv_sec <= abort->start.tv_sec+1)
844 kill(0, SIGINT);
845 abort->count = 0;
846 }
847}
848
849/* This is the routine which handles console output (ie. stdout). */
850static void console_output(struct virtqueue *vq)
851{
852 unsigned int head, out, in;
853 struct iovec iov[vq->vring.num];
854
855 /* We usually wait in here, for the Guest to give us something. */
856 head = wait_for_vq_desc(vq, iov, &out, &in);
857 if (in)
858 errx(1, "Input buffers in console output queue?");
859
860 /* writev can return a partial write, so we loop here. */
861 while (!iov_empty(iov, out)) {
862 int len = writev(STDOUT_FILENO, iov, out);
863 if (len <= 0) {
864 warn("Write to stdout gave %i (%d)", len, errno);
865 break;
866 }
867 iov_consume(iov, out, len);
868 }
869
870 /*
871 * We're finished with that buffer: if we're going to sleep,
872 * wait_for_vq_desc() will prod the Guest with an interrupt.
873 */
874 add_used(vq, head, 0);
875}
876
877/*
878 * The Network
879 *
880 * Handling output for network is also simple: we get all the output buffers
881 * and write them to /dev/net/tun.
882 */
883struct net_info {
884 int tunfd;
885};
886
887static void net_output(struct virtqueue *vq)
888{
889 struct net_info *net_info = vq->dev->priv;
890 unsigned int head, out, in;
891 struct iovec iov[vq->vring.num];
892
893 /* We usually wait in here for the Guest to give us a packet. */
894 head = wait_for_vq_desc(vq, iov, &out, &in);
895 if (in)
896 errx(1, "Input buffers in net output queue?");
897 /*
898 * Send the whole thing through to /dev/net/tun. It expects the exact
899 * same format: what a coincidence!
900 */
901 if (writev(net_info->tunfd, iov, out) < 0)
902 warnx("Write to tun failed (%d)?", errno);
903
904 /*
905 * Done with that one; wait_for_vq_desc() will send the interrupt if
906 * all packets are processed.
907 */
908 add_used(vq, head, 0);
909}
910
911/*
912 * Handling network input is a bit trickier, because I've tried to optimize it.
913 *
914 * First we have a helper routine which tells is if from this file descriptor
915 * (ie. the /dev/net/tun device) will block:
916 */
917static bool will_block(int fd)
918{
919 fd_set fdset;
920 struct timeval zero = { 0, 0 };
921 FD_ZERO(&fdset);
922 FD_SET(fd, &fdset);
923 return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
924}
925
926/*
927 * This handles packets coming in from the tun device to our Guest. Like all
928 * service routines, it gets called again as soon as it returns, so you don't
929 * see a while(1) loop here.
930 */
931static void net_input(struct virtqueue *vq)
932{
933 int len;
934 unsigned int head, out, in;
935 struct iovec iov[vq->vring.num];
936 struct net_info *net_info = vq->dev->priv;
937
938 /*
939 * Get a descriptor to write an incoming packet into. This will also
940 * send an interrupt if they're out of descriptors.
941 */
942 head = wait_for_vq_desc(vq, iov, &out, &in);
943 if (out)
944 errx(1, "Output buffers in net input queue?");
945
946 /*
947 * If it looks like we'll block reading from the tun device, send them
948 * an interrupt.
949 */
950 if (vq->pending_used && will_block(net_info->tunfd))
951 trigger_irq(vq);
952
953 /*
954 * Read in the packet. This is where we normally wait (when there's no
955 * incoming network traffic).
956 */
957 len = readv(net_info->tunfd, iov, in);
958 if (len <= 0)
959 warn("Failed to read from tun (%d).", errno);
960
961 /*
962 * Mark that packet buffer as used, but don't interrupt here. We want
963 * to wait until we've done as much work as we can.
964 */
965 add_used(vq, head, len);
966}
967/*:*/
968
969/* This is the helper to create threads: run the service routine in a loop. */
970static int do_thread(void *_vq)
971{
972 struct virtqueue *vq = _vq;
973
974 for (;;)
975 vq->service(vq);
976 return 0;
977}
978
979/*
980 * When a child dies, we kill our entire process group with SIGTERM. This
981 * also has the side effect that the shell restores the console for us!
982 */
983static void kill_launcher(int signal)
984{
985 kill(0, SIGTERM);
986}
987
988static void reset_device(struct device *dev)
989{
990 struct virtqueue *vq;
991
992 verbose("Resetting device %s\n", dev->name);
993
994 /* Clear any features they've acked. */
995 memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
996
997 /* We're going to be explicitly killing threads, so ignore them. */
998 signal(SIGCHLD, SIG_IGN);
999
1000 /* Zero out the virtqueues, get rid of their threads */
1001 for (vq = dev->vq; vq; vq = vq->next) {
1002 if (vq->thread != (pid_t)-1) {
1003 kill(vq->thread, SIGTERM);
1004 waitpid(vq->thread, NULL, 0);
1005 vq->thread = (pid_t)-1;
1006 }
1007 memset(vq->vring.desc, 0,
1008 vring_size(vq->config.num, LGUEST_VRING_ALIGN));
1009 lg_last_avail(vq) = 0;
1010 }
1011 dev->running = false;
1012
1013 /* Now we care if threads die. */
1014 signal(SIGCHLD, (void *)kill_launcher);
1015}
1016
1017/*L:216
1018 * This actually creates the thread which services the virtqueue for a device.
1019 */
1020static void create_thread(struct virtqueue *vq)
1021{
1022 /*
1023 * Create stack for thread. Since the stack grows upwards, we point
1024 * the stack pointer to the end of this region.
1025 */
1026 char *stack = malloc(32768);
1027 unsigned long args[] = { LHREQ_EVENTFD,
1028 vq->config.pfn*getpagesize(), 0 };
1029
1030 /* Create a zero-initialized eventfd. */
1031 vq->eventfd = eventfd(0, 0);
1032 if (vq->eventfd < 0)
1033 err(1, "Creating eventfd");
1034 args[2] = vq->eventfd;
1035
1036 /*
1037 * Attach an eventfd to this virtqueue: it will go off when the Guest
1038 * does an LHCALL_NOTIFY for this vq.
1039 */
1040 if (write(lguest_fd, &args, sizeof(args)) != 0)
1041 err(1, "Attaching eventfd");
1042
1043 /*
1044 * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so
1045 * we get a signal if it dies.
1046 */
1047 vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
1048 if (vq->thread == (pid_t)-1)
1049 err(1, "Creating clone");
1050
1051 /* We close our local copy now the child has it. */
1052 close(vq->eventfd);
1053}
1054
1055static void start_device(struct device *dev)
1056{
1057 unsigned int i;
1058 struct virtqueue *vq;
1059
1060 verbose("Device %s OK: offered", dev->name);
1061 for (i = 0; i < dev->feature_len; i++)
1062 verbose(" %02x", get_feature_bits(dev)[i]);
1063 verbose(", accepted");
1064 for (i = 0; i < dev->feature_len; i++)
1065 verbose(" %02x", get_feature_bits(dev)
1066 [dev->feature_len+i]);
1067
1068 for (vq = dev->vq; vq; vq = vq->next) {
1069 if (vq->service)
1070 create_thread(vq);
1071 }
1072 dev->running = true;
1073}
1074
1075static void cleanup_devices(void)
1076{
1077 struct device *dev;
1078
1079 for (dev = devices.dev; dev; dev = dev->next)
1080 reset_device(dev);
1081
1082 /* If we saved off the original terminal settings, restore them now. */
1083 if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
1084 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
1085}
1086
1087/* When the Guest tells us they updated the status field, we handle it. */
1088static void update_device_status(struct device *dev)
1089{
1090 /* A zero status is a reset, otherwise it's a set of flags. */
1091 if (dev->desc->status == 0)
1092 reset_device(dev);
1093 else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
1094 warnx("Device %s configuration FAILED", dev->name);
1095 if (dev->running)
1096 reset_device(dev);
1097 } else {
1098 if (dev->running)
1099 err(1, "Device %s features finalized twice", dev->name);
1100 start_device(dev);
1101 }
1102}
1103
1104/*L:215
1105 * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In
1106 * particular, it's used to notify us of device status changes during boot.
1107 */
1108static void handle_output(unsigned long addr)
1109{
1110 struct device *i;
1111
1112 /* Check each device. */
1113 for (i = devices.dev; i; i = i->next) {
1114 struct virtqueue *vq;
1115
1116 /*
1117 * Notifications to device descriptors mean they updated the
1118 * device status.
1119 */
1120 if (from_guest_phys(addr) == i->desc) {
1121 update_device_status(i);
1122 return;
1123 }
1124
1125 /* Devices should not be used before features are finalized. */
1126 for (vq = i->vq; vq; vq = vq->next) {
1127 if (addr != vq->config.pfn*getpagesize())
1128 continue;
1129 errx(1, "Notification on %s before setup!", i->name);
1130 }
1131 }
1132
1133 /*
1134 * Early console write is done using notify on a nul-terminated string
1135 * in Guest memory. It's also great for hacking debugging messages
1136 * into a Guest.
1137 */
1138 if (addr >= guest_limit)
1139 errx(1, "Bad NOTIFY %#lx", addr);
1140
1141 write(STDOUT_FILENO, from_guest_phys(addr),
1142 strnlen(from_guest_phys(addr), guest_limit - addr));
1143}
1144
1145/*L:190
1146 * Device Setup
1147 *
1148 * All devices need a descriptor so the Guest knows it exists, and a "struct
1149 * device" so the Launcher can keep track of it. We have common helper
1150 * routines to allocate and manage them.
1151 */
1152
1153/*
1154 * The layout of the device page is a "struct lguest_device_desc" followed by a
1155 * number of virtqueue descriptors, then two sets of feature bits, then an
1156 * array of configuration bytes. This routine returns the configuration
1157 * pointer.
1158 */
1159static u8 *device_config(const struct device *dev)
1160{
1161 return (void *)(dev->desc + 1)
1162 + dev->num_vq * sizeof(struct lguest_vqconfig)
1163 + dev->feature_len * 2;
1164}
1165
1166/*
1167 * This routine allocates a new "struct lguest_device_desc" from descriptor
1168 * table page just above the Guest's normal memory. It returns a pointer to
1169 * that descriptor.
1170 */
1171static struct lguest_device_desc *new_dev_desc(u16 type)
1172{
1173 struct lguest_device_desc d = { .type = type };
1174 void *p;
1175
1176 /* Figure out where the next device config is, based on the last one. */
1177 if (devices.lastdev)
1178 p = device_config(devices.lastdev)
1179 + devices.lastdev->desc->config_len;
1180 else
1181 p = devices.descpage;
1182
1183 /* We only have one page for all the descriptors. */
1184 if (p + sizeof(d) > (void *)devices.descpage + getpagesize())
1185 errx(1, "Too many devices");
1186
1187 /* p might not be aligned, so we memcpy in. */
1188 return memcpy(p, &d, sizeof(d));
1189}
1190
1191/*
1192 * Each device descriptor is followed by the description of its virtqueues. We
1193 * specify how many descriptors the virtqueue is to have.
1194 */
1195static void add_virtqueue(struct device *dev, unsigned int num_descs,
1196 void (*service)(struct virtqueue *))
1197{
1198 unsigned int pages;
1199 struct virtqueue **i, *vq = malloc(sizeof(*vq));
1200 void *p;
1201
1202 /* First we need some memory for this virtqueue. */
1203 pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1)
1204 / getpagesize();
1205 p = get_pages(pages);
1206
1207 /* Initialize the virtqueue */
1208 vq->next = NULL;
1209 vq->last_avail_idx = 0;
1210 vq->dev = dev;
1211
1212 /*
1213 * This is the routine the service thread will run, and its Process ID
1214 * once it's running.
1215 */
1216 vq->service = service;
1217 vq->thread = (pid_t)-1;
1218
1219 /* Initialize the configuration. */
1220 vq->config.num = num_descs;
1221 vq->config.irq = devices.next_irq++;
1222 vq->config.pfn = to_guest_phys(p) / getpagesize();
1223
1224 /* Initialize the vring. */
1225 vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
1226
1227 /*
1228 * Append virtqueue to this device's descriptor. We use
1229 * device_config() to get the end of the device's current virtqueues;
1230 * we check that we haven't added any config or feature information
1231 * yet, otherwise we'd be overwriting them.
1232 */
1233 assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
1234 memcpy(device_config(dev), &vq->config, sizeof(vq->config));
1235 dev->num_vq++;
1236 dev->desc->num_vq++;
1237
1238 verbose("Virtqueue page %#lx\n", to_guest_phys(p));
1239
1240 /*
1241 * Add to tail of list, so dev->vq is first vq, dev->vq->next is
1242 * second.
1243 */
1244 for (i = &dev->vq; *i; i = &(*i)->next);
1245 *i = vq;
1246}
1247
1248/*
1249 * The first half of the feature bitmask is for us to advertise features. The
1250 * second half is for the Guest to accept features.
1251 */
1252static void add_feature(struct device *dev, unsigned bit)
1253{
1254 u8 *features = get_feature_bits(dev);
1255
1256 /* We can't extend the feature bits once we've added config bytes */
1257 if (dev->desc->feature_len <= bit / CHAR_BIT) {
1258 assert(dev->desc->config_len == 0);
1259 dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
1260 }
1261
1262 features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
1263}
1264
1265/*
1266 * This routine sets the configuration fields for an existing device's
1267 * descriptor. It only works for the last device, but that's OK because that's
1268 * how we use it.
1269 */
1270static void set_config(struct device *dev, unsigned len, const void *conf)
1271{
1272 /* Check we haven't overflowed our single page. */
1273 if (device_config(dev) + len > devices.descpage + getpagesize())
1274 errx(1, "Too many devices");
1275
1276 /* Copy in the config information, and store the length. */
1277 memcpy(device_config(dev), conf, len);
1278 dev->desc->config_len = len;
1279
1280 /* Size must fit in config_len field (8 bits)! */
1281 assert(dev->desc->config_len == len);
1282}
1283
1284/*
1285 * This routine does all the creation and setup of a new device, including
1286 * calling new_dev_desc() to allocate the descriptor and device memory. We
1287 * don't actually start the service threads until later.
1288 *
1289 * See what I mean about userspace being boring?
1290 */
1291static struct device *new_device(const char *name, u16 type)
1292{
1293 struct device *dev = malloc(sizeof(*dev));
1294
1295 /* Now we populate the fields one at a time. */
1296 dev->desc = new_dev_desc(type);
1297 dev->name = name;
1298 dev->vq = NULL;
1299 dev->feature_len = 0;
1300 dev->num_vq = 0;
1301 dev->running = false;
1302
1303 /*
1304 * Append to device list. Prepending to a single-linked list is
1305 * easier, but the user expects the devices to be arranged on the bus
1306 * in command-line order. The first network device on the command line
1307 * is eth0, the first block device /dev/vda, etc.
1308 */
1309 if (devices.lastdev)
1310 devices.lastdev->next = dev;
1311 else
1312 devices.dev = dev;
1313 devices.lastdev = dev;
1314
1315 return dev;
1316}
1317
1318/*
1319 * Our first setup routine is the console. It's a fairly simple device, but
1320 * UNIX tty handling makes it uglier than it could be.
1321 */
1322static void setup_console(void)
1323{
1324 struct device *dev;
1325
1326 /* If we can save the initial standard input settings... */
1327 if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
1328 struct termios term = orig_term;
1329 /*
1330 * Then we turn off echo, line buffering and ^C etc: We want a
1331 * raw input stream to the Guest.
1332 */
1333 term.c_lflag &= ~(ISIG|ICANON|ECHO);
1334 tcsetattr(STDIN_FILENO, TCSANOW, &term);
1335 }
1336
1337 dev = new_device("console", VIRTIO_ID_CONSOLE);
1338
1339 /* We store the console state in dev->priv, and initialize it. */
1340 dev->priv = malloc(sizeof(struct console_abort));
1341 ((struct console_abort *)dev->priv)->count = 0;
1342
1343 /*
1344 * The console needs two virtqueues: the input then the output. When
1345 * they put something the input queue, we make sure we're listening to
1346 * stdin. When they put something in the output queue, we write it to
1347 * stdout.
1348 */
1349 add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
1350 add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
1351
1352 verbose("device %u: console\n", ++devices.device_num);
1353}
1354/*:*/
1355
1356/*M:010
1357 * Inter-guest networking is an interesting area. Simplest is to have a
1358 * --sharenet=<name> option which opens or creates a named pipe. This can be
1359 * used to send packets to another guest in a 1:1 manner.
1360 *
1361 * More sophisticated is to use one of the tools developed for project like UML
1362 * to do networking.
1363 *
1364 * Faster is to do virtio bonding in kernel. Doing this 1:1 would be
1365 * completely generic ("here's my vring, attach to your vring") and would work
1366 * for any traffic. Of course, namespace and permissions issues need to be
1367 * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide
1368 * multiple inter-guest channels behind one interface, although it would
1369 * require some manner of hotplugging new virtio channels.
1370 *
1371 * Finally, we could use a virtio network switch in the kernel, ie. vhost.
1372:*/
1373
1374static u32 str2ip(const char *ipaddr)
1375{
1376 unsigned int b[4];
1377
1378 if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4)
1379 errx(1, "Failed to parse IP address '%s'", ipaddr);
1380 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
1381}
1382
1383static void str2mac(const char *macaddr, unsigned char mac[6])
1384{
1385 unsigned int m[6];
1386 if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x",
1387 &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6)
1388 errx(1, "Failed to parse mac address '%s'", macaddr);
1389 mac[0] = m[0];
1390 mac[1] = m[1];
1391 mac[2] = m[2];
1392 mac[3] = m[3];
1393 mac[4] = m[4];
1394 mac[5] = m[5];
1395}
1396
1397/*
1398 * This code is "adapted" from libbridge: it attaches the Host end of the
1399 * network device to the bridge device specified by the command line.
1400 *
1401 * This is yet another James Morris contribution (I'm an IP-level guy, so I
1402 * dislike bridging), and I just try not to break it.
1403 */
1404static void add_to_bridge(int fd, const char *if_name, const char *br_name)
1405{
1406 int ifidx;
1407 struct ifreq ifr;
1408
1409 if (!*br_name)
1410 errx(1, "must specify bridge name");
1411
1412 ifidx = if_nametoindex(if_name);
1413 if (!ifidx)
1414 errx(1, "interface %s does not exist!", if_name);
1415
1416 strncpy(ifr.ifr_name, br_name, IFNAMSIZ);
1417 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1418 ifr.ifr_ifindex = ifidx;
1419 if (ioctl(fd, SIOCBRADDIF, &ifr) < 0)
1420 err(1, "can't add %s to bridge %s", if_name, br_name);
1421}
1422
1423/*
1424 * This sets up the Host end of the network device with an IP address, brings
1425 * it up so packets will flow, the copies the MAC address into the hwaddr
1426 * pointer.
1427 */
1428static void configure_device(int fd, const char *tapif, u32 ipaddr)
1429{
1430 struct ifreq ifr;
1431 struct sockaddr_in sin;
1432
1433 memset(&ifr, 0, sizeof(ifr));
1434 strcpy(ifr.ifr_name, tapif);
1435
1436 /* Don't read these incantations. Just cut & paste them like I did! */
1437 sin.sin_family = AF_INET;
1438 sin.sin_addr.s_addr = htonl(ipaddr);
1439 memcpy(&ifr.ifr_addr, &sin, sizeof(sin));
1440 if (ioctl(fd, SIOCSIFADDR, &ifr) != 0)
1441 err(1, "Setting %s interface address", tapif);
1442 ifr.ifr_flags = IFF_UP;
1443 if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
1444 err(1, "Bringing interface %s up", tapif);
1445}
1446
1447static int get_tun_device(char tapif[IFNAMSIZ])
1448{
1449 struct ifreq ifr;
1450 int netfd;
1451
1452 /* Start with this zeroed. Messy but sure. */
1453 memset(&ifr, 0, sizeof(ifr));
1454
1455 /*
1456 * We open the /dev/net/tun device and tell it we want a tap device. A
1457 * tap device is like a tun device, only somehow different. To tell
1458 * the truth, I completely blundered my way through this code, but it
1459 * works now!
1460 */
1461 netfd = open_or_die("/dev/net/tun", O_RDWR);
1462 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
1463 strcpy(ifr.ifr_name, "tap%d");
1464 if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
1465 err(1, "configuring /dev/net/tun");
1466
1467 if (ioctl(netfd, TUNSETOFFLOAD,
1468 TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
1469 err(1, "Could not set features for tun device");
1470
1471 /*
1472 * We don't need checksums calculated for packets coming in this
1473 * device: trust us!
1474 */
1475 ioctl(netfd, TUNSETNOCSUM, 1);
1476
1477 memcpy(tapif, ifr.ifr_name, IFNAMSIZ);
1478 return netfd;
1479}
1480
1481/*L:195
1482 * Our network is a Host<->Guest network. This can either use bridging or
1483 * routing, but the principle is the same: it uses the "tun" device to inject
1484 * packets into the Host as if they came in from a normal network card. We
1485 * just shunt packets between the Guest and the tun device.
1486 */
1487static void setup_tun_net(char *arg)
1488{
1489 struct device *dev;
1490 struct net_info *net_info = malloc(sizeof(*net_info));
1491 int ipfd;
1492 u32 ip = INADDR_ANY;
1493 bool bridging = false;
1494 char tapif[IFNAMSIZ], *p;
1495 struct virtio_net_config conf;
1496
1497 net_info->tunfd = get_tun_device(tapif);
1498
1499 /* First we create a new network device. */
1500 dev = new_device("net", VIRTIO_ID_NET);
1501 dev->priv = net_info;
1502
1503 /* Network devices need a recv and a send queue, just like console. */
1504 add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
1505 add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
1506
1507 /*
1508 * We need a socket to perform the magic network ioctls to bring up the
1509 * tap interface, connect to the bridge etc. Any socket will do!
1510 */
1511 ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
1512 if (ipfd < 0)
1513 err(1, "opening IP socket");
1514
1515 /* If the command line was --tunnet=bridge:<name> do bridging. */
1516 if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
1517 arg += strlen(BRIDGE_PFX);
1518 bridging = true;
1519 }
1520
1521 /* A mac address may follow the bridge name or IP address */
1522 p = strchr(arg, ':');
1523 if (p) {
1524 str2mac(p+1, conf.mac);
1525 add_feature(dev, VIRTIO_NET_F_MAC);
1526 *p = '\0';
1527 }
1528
1529 /* arg is now either an IP address or a bridge name */
1530 if (bridging)
1531 add_to_bridge(ipfd, tapif, arg);
1532 else
1533 ip = str2ip(arg);
1534
1535 /* Set up the tun device. */
1536 configure_device(ipfd, tapif, ip);
1537
1538 /* Expect Guest to handle everything except UFO */
1539 add_feature(dev, VIRTIO_NET_F_CSUM);
1540 add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
1541 add_feature(dev, VIRTIO_NET_F_GUEST_TSO4);
1542 add_feature(dev, VIRTIO_NET_F_GUEST_TSO6);
1543 add_feature(dev, VIRTIO_NET_F_GUEST_ECN);
1544 add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
1545 add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
1546 add_feature(dev, VIRTIO_NET_F_HOST_ECN);
1547 /* We handle indirect ring entries */
1548 add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
1549 set_config(dev, sizeof(conf), &conf);
1550
1551 /* We don't need the socket any more; setup is done. */
1552 close(ipfd);
1553
1554 devices.device_num++;
1555
1556 if (bridging)
1557 verbose("device %u: tun %s attached to bridge: %s\n",
1558 devices.device_num, tapif, arg);
1559 else
1560 verbose("device %u: tun %s: %s\n",
1561 devices.device_num, tapif, arg);
1562}
1563/*:*/
1564
1565/* This hangs off device->priv. */
1566struct vblk_info {
1567 /* The size of the file. */
1568 off64_t len;
1569
1570 /* The file descriptor for the file. */
1571 int fd;
1572
1573};
1574
1575/*L:210
1576 * The Disk
1577 *
1578 * The disk only has one virtqueue, so it only has one thread. It is really
1579 * simple: the Guest asks for a block number and we read or write that position
1580 * in the file.
1581 *
1582 * Before we serviced each virtqueue in a separate thread, that was unacceptably
1583 * slow: the Guest waits until the read is finished before running anything
1584 * else, even if it could have been doing useful work.
1585 *
1586 * We could have used async I/O, except it's reputed to suck so hard that
1587 * characters actually go missing from your code when you try to use it.
1588 */
1589static void blk_request(struct virtqueue *vq)
1590{
1591 struct vblk_info *vblk = vq->dev->priv;
1592 unsigned int head, out_num, in_num, wlen;
1593 int ret;
1594 u8 *in;
1595 struct virtio_blk_outhdr *out;
1596 struct iovec iov[vq->vring.num];
1597 off64_t off;
1598
1599 /*
1600 * Get the next request, where we normally wait. It triggers the
1601 * interrupt to acknowledge previously serviced requests (if any).
1602 */
1603 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1604
1605 /*
1606 * Every block request should contain at least one output buffer
1607 * (detailing the location on disk and the type of request) and one
1608 * input buffer (to hold the result).
1609 */
1610 if (out_num == 0 || in_num == 0)
1611 errx(1, "Bad virtblk cmd %u out=%u in=%u",
1612 head, out_num, in_num);
1613
1614 out = convert(&iov[0], struct virtio_blk_outhdr);
1615 in = convert(&iov[out_num+in_num-1], u8);
1616 /*
1617 * For historical reasons, block operations are expressed in 512 byte
1618 * "sectors".
1619 */
1620 off = out->sector * 512;
1621
1622 /*
1623 * In general the virtio block driver is allowed to try SCSI commands.
1624 * It'd be nice if we supported eject, for example, but we don't.
1625 */
1626 if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
1627 fprintf(stderr, "Scsi commands unsupported\n");
1628 *in = VIRTIO_BLK_S_UNSUPP;
1629 wlen = sizeof(*in);
1630 } else if (out->type & VIRTIO_BLK_T_OUT) {
1631 /*
1632 * Write
1633 *
1634 * Move to the right location in the block file. This can fail
1635 * if they try to write past end.
1636 */
1637 if (lseek64(vblk->fd, off, SEEK_SET) != off)
1638 err(1, "Bad seek to sector %llu", out->sector);
1639
1640 ret = writev(vblk->fd, iov+1, out_num-1);
1641 verbose("WRITE to sector %llu: %i\n", out->sector, ret);
1642
1643 /*
1644 * Grr... Now we know how long the descriptor they sent was, we
1645 * make sure they didn't try to write over the end of the block
1646 * file (possibly extending it).
1647 */
1648 if (ret > 0 && off + ret > vblk->len) {
1649 /* Trim it back to the correct length */
1650 ftruncate64(vblk->fd, vblk->len);
1651 /* Die, bad Guest, die. */
1652 errx(1, "Write past end %llu+%u", off, ret);
1653 }
1654
1655 wlen = sizeof(*in);
1656 *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
1657 } else if (out->type & VIRTIO_BLK_T_FLUSH) {
1658 /* Flush */
1659 ret = fdatasync(vblk->fd);
1660 verbose("FLUSH fdatasync: %i\n", ret);
1661 wlen = sizeof(*in);
1662 *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
1663 } else {
1664 /*
1665 * Read
1666 *
1667 * Move to the right location in the block file. This can fail
1668 * if they try to read past end.
1669 */
1670 if (lseek64(vblk->fd, off, SEEK_SET) != off)
1671 err(1, "Bad seek to sector %llu", out->sector);
1672
1673 ret = readv(vblk->fd, iov+1, in_num-1);
1674 verbose("READ from sector %llu: %i\n", out->sector, ret);
1675 if (ret >= 0) {
1676 wlen = sizeof(*in) + ret;
1677 *in = VIRTIO_BLK_S_OK;
1678 } else {
1679 wlen = sizeof(*in);
1680 *in = VIRTIO_BLK_S_IOERR;
1681 }
1682 }
1683
1684 /* Finished that request. */
1685 add_used(vq, head, wlen);
1686}
1687
1688/*L:198 This actually sets up a virtual block device. */
1689static void setup_block_file(const char *filename)
1690{
1691 struct device *dev;
1692 struct vblk_info *vblk;
1693 struct virtio_blk_config conf;
1694
1695 /* Creat the device. */
1696 dev = new_device("block", VIRTIO_ID_BLOCK);
1697
1698 /* The device has one virtqueue, where the Guest places requests. */
1699 add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
1700
1701 /* Allocate the room for our own bookkeeping */
1702 vblk = dev->priv = malloc(sizeof(*vblk));
1703
1704 /* First we open the file and store the length. */
1705 vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);
1706 vblk->len = lseek64(vblk->fd, 0, SEEK_END);
1707
1708 /* We support FLUSH. */
1709 add_feature(dev, VIRTIO_BLK_F_FLUSH);
1710
1711 /* Tell Guest how many sectors this device has. */
1712 conf.capacity = cpu_to_le64(vblk->len / 512);
1713
1714 /*
1715 * Tell Guest not to put in too many descriptors at once: two are used
1716 * for the in and out elements.
1717 */
1718 add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
1719 conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2);
1720
1721 /* Don't try to put whole struct: we have 8 bit limit. */
1722 set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf);
1723
1724 verbose("device %u: virtblock %llu sectors\n",
1725 ++devices.device_num, le64_to_cpu(conf.capacity));
1726}
1727
1728/*L:211
1729 * Our random number generator device reads from /dev/random into the Guest's
1730 * input buffers. The usual case is that the Guest doesn't want random numbers
1731 * and so has no buffers although /dev/random is still readable, whereas
1732 * console is the reverse.
1733 *
1734 * The same logic applies, however.
1735 */
1736struct rng_info {
1737 int rfd;
1738};
1739
1740static void rng_input(struct virtqueue *vq)
1741{
1742 int len;
1743 unsigned int head, in_num, out_num, totlen = 0;
1744 struct rng_info *rng_info = vq->dev->priv;
1745 struct iovec iov[vq->vring.num];
1746
1747 /* First we need a buffer from the Guests's virtqueue. */
1748 head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
1749 if (out_num)
1750 errx(1, "Output buffers in rng?");
1751
1752 /*
1753 * Just like the console write, we loop to cover the whole iovec.
1754 * In this case, short reads actually happen quite a bit.
1755 */
1756 while (!iov_empty(iov, in_num)) {
1757 len = readv(rng_info->rfd, iov, in_num);
1758 if (len <= 0)
1759 err(1, "Read from /dev/random gave %i", len);
1760 iov_consume(iov, in_num, len);
1761 totlen += len;
1762 }
1763
1764 /* Tell the Guest about the new input. */
1765 add_used(vq, head, totlen);
1766}
1767
1768/*L:199
1769 * This creates a "hardware" random number device for the Guest.
1770 */
1771static void setup_rng(void)
1772{
1773 struct device *dev;
1774 struct rng_info *rng_info = malloc(sizeof(*rng_info));
1775
1776 /* Our device's privat info simply contains the /dev/random fd. */
1777 rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
1778
1779 /* Create the new device. */
1780 dev = new_device("rng", VIRTIO_ID_RNG);
1781 dev->priv = rng_info;
1782
1783 /* The device has one virtqueue, where the Guest places inbufs. */
1784 add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
1785
1786 verbose("device %u: rng\n", devices.device_num++);
1787}
1788/* That's the end of device setup. */
1789
1790/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
1791static void __attribute__((noreturn)) restart_guest(void)
1792{
1793 unsigned int i;
1794
1795 /*
1796 * Since we don't track all open fds, we simply close everything beyond
1797 * stderr.
1798 */
1799 for (i = 3; i < FD_SETSIZE; i++)
1800 close(i);
1801
1802 /* Reset all the devices (kills all threads). */
1803 cleanup_devices();
1804
1805 execv(main_args[0], main_args);
1806 err(1, "Could not exec %s", main_args[0]);
1807}
1808
1809/*L:220
1810 * Finally we reach the core of the Launcher which runs the Guest, serves
1811 * its input and output, and finally, lays it to rest.
1812 */
1813static void __attribute__((noreturn)) run_guest(void)
1814{
1815 for (;;) {
1816 unsigned long notify_addr;
1817 int readval;
1818
1819 /* We read from the /dev/lguest device to run the Guest. */
1820 readval = pread(lguest_fd, &notify_addr,
1821 sizeof(notify_addr), cpu_id);
1822
1823 /* One unsigned long means the Guest did HCALL_NOTIFY */
1824 if (readval == sizeof(notify_addr)) {
1825 verbose("Notify on address %#lx\n", notify_addr);
1826 handle_output(notify_addr);
1827 /* ENOENT means the Guest died. Reading tells us why. */
1828 } else if (errno == ENOENT) {
1829 char reason[1024] = { 0 };
1830 pread(lguest_fd, reason, sizeof(reason)-1, cpu_id);
1831 errx(1, "%s", reason);
1832 /* ERESTART means that we need to reboot the guest */
1833 } else if (errno == ERESTART) {
1834 restart_guest();
1835 /* Anything else means a bug or incompatible change. */
1836 } else
1837 err(1, "Running guest failed");
1838 }
1839}
1840/*L:240
1841 * This is the end of the Launcher. The good news: we are over halfway
1842 * through! The bad news: the most fiendish part of the code still lies ahead
1843 * of us.
1844 *
1845 * Are you ready? Take a deep breath and join me in the core of the Host, in
1846 * "make Host".
1847:*/
1848
1849static struct option opts[] = {
1850 { "verbose", 0, NULL, 'v' },
1851 { "tunnet", 1, NULL, 't' },
1852 { "block", 1, NULL, 'b' },
1853 { "rng", 0, NULL, 'r' },
1854 { "initrd", 1, NULL, 'i' },
1855 { "username", 1, NULL, 'u' },
1856 { "chroot", 1, NULL, 'c' },
1857 { NULL },
1858};
1859static void usage(void)
1860{
1861 errx(1, "Usage: lguest [--verbose] "
1862 "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n"
1863 "|--block=<filename>|--initrd=<filename>]...\n"
1864 "<mem-in-mb> vmlinux [args...]");
1865}
1866
1867/*L:105 The main routine is where the real work begins: */
1868int main(int argc, char *argv[])
1869{
1870 /* Memory, code startpoint and size of the (optional) initrd. */
1871 unsigned long mem = 0, start, initrd_size = 0;
1872 /* Two temporaries. */
1873 int i, c;
1874 /* The boot information for the Guest. */
1875 struct boot_params *boot;
1876 /* If they specify an initrd file to load. */
1877 const char *initrd_name = NULL;
1878
1879 /* Password structure for initgroups/setres[gu]id */
1880 struct passwd *user_details = NULL;
1881
1882 /* Directory to chroot to */
1883 char *chroot_path = NULL;
1884
1885 /* Save the args: we "reboot" by execing ourselves again. */
1886 main_args = argv;
1887
1888 /*
1889 * First we initialize the device list. We keep a pointer to the last
1890 * device, and the next interrupt number to use for devices (1:
1891 * remember that 0 is used by the timer).
1892 */
1893 devices.lastdev = NULL;
1894 devices.next_irq = 1;
1895
1896 /* We're CPU 0. In fact, that's the only CPU possible right now. */
1897 cpu_id = 0;
1898
1899 /*
1900 * We need to know how much memory so we can set up the device
1901 * descriptor and memory pages for the devices as we parse the command
1902 * line. So we quickly look through the arguments to find the amount
1903 * of memory now.
1904 */
1905 for (i = 1; i < argc; i++) {
1906 if (argv[i][0] != '-') {
1907 mem = atoi(argv[i]) * 1024 * 1024;
1908 /*
1909 * We start by mapping anonymous pages over all of
1910 * guest-physical memory range. This fills it with 0,
1911 * and ensures that the Guest won't be killed when it
1912 * tries to access it.
1913 */
1914 guest_base = map_zeroed_pages(mem / getpagesize()
1915 + DEVICE_PAGES);
1916 guest_limit = mem;
1917 guest_max = mem + DEVICE_PAGES*getpagesize();
1918 devices.descpage = get_pages(1);
1919 break;
1920 }
1921 }
1922
1923 /* The options are fairly straight-forward */
1924 while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
1925 switch (c) {
1926 case 'v':
1927 verbose = true;
1928 break;
1929 case 't':
1930 setup_tun_net(optarg);
1931 break;
1932 case 'b':
1933 setup_block_file(optarg);
1934 break;
1935 case 'r':
1936 setup_rng();
1937 break;
1938 case 'i':
1939 initrd_name = optarg;
1940 break;
1941 case 'u':
1942 user_details = getpwnam(optarg);
1943 if (!user_details)
1944 err(1, "getpwnam failed, incorrect username?");
1945 break;
1946 case 'c':
1947 chroot_path = optarg;
1948 break;
1949 default:
1950 warnx("Unknown argument %s", argv[optind]);
1951 usage();
1952 }
1953 }
1954 /*
1955 * After the other arguments we expect memory and kernel image name,
1956 * followed by command line arguments for the kernel.
1957 */
1958 if (optind + 2 > argc)
1959 usage();
1960
1961 verbose("Guest base is at %p\n", guest_base);
1962
1963 /* We always have a console device */
1964 setup_console();
1965
1966 /* Now we load the kernel */
1967 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
1968
1969 /* Boot information is stashed at physical address 0 */
1970 boot = from_guest_phys(0);
1971
1972 /* Map the initrd image if requested (at top of physical memory) */
1973 if (initrd_name) {
1974 initrd_size = load_initrd(initrd_name, mem);
1975 /*
1976 * These are the location in the Linux boot header where the
1977 * start and size of the initrd are expected to be found.
1978 */
1979 boot->hdr.ramdisk_image = mem - initrd_size;
1980 boot->hdr.ramdisk_size = initrd_size;
1981 /* The bootloader type 0xFF means "unknown"; that's OK. */
1982 boot->hdr.type_of_loader = 0xFF;
1983 }
1984
1985 /*
1986 * The Linux boot header contains an "E820" memory map: ours is a
1987 * simple, single region.
1988 */
1989 boot->e820_entries = 1;
1990 boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM });
1991 /*
1992 * The boot header contains a command line pointer: we put the command
1993 * line after the boot header.
1994 */
1995 boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);
1996 /* We use a simple helper to copy the arguments separated by spaces. */
1997 concat((char *)(boot + 1), argv+optind+2);
1998
1999 /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */
2000 boot->hdr.kernel_alignment = 0x1000000;
2001
2002 /* Boot protocol version: 2.07 supports the fields for lguest. */
2003 boot->hdr.version = 0x207;
2004
2005 /* The hardware_subarch value of "1" tells the Guest it's an lguest. */
2006 boot->hdr.hardware_subarch = 1;
2007
2008 /* Tell the entry path not to try to reload segment registers. */
2009 boot->hdr.loadflags |= KEEP_SEGMENTS;
2010
2011 /* We tell the kernel to initialize the Guest. */
2012 tell_kernel(start);
2013
2014 /* Ensure that we terminate if a device-servicing child dies. */
2015 signal(SIGCHLD, kill_launcher);
2016
2017 /* If we exit via err(), this kills all the threads, restores tty. */
2018 atexit(cleanup_devices);
2019
2020 /* If requested, chroot to a directory */
2021 if (chroot_path) {
2022 if (chroot(chroot_path) != 0)
2023 err(1, "chroot(\"%s\") failed", chroot_path);
2024
2025 if (chdir("/") != 0)
2026 err(1, "chdir(\"/\") failed");
2027
2028 verbose("chroot done\n");
2029 }
2030
2031 /* If requested, drop privileges */
2032 if (user_details) {
2033 uid_t u;
2034 gid_t g;
2035
2036 u = user_details->pw_uid;
2037 g = user_details->pw_gid;
2038
2039 if (initgroups(user_details->pw_name, g) != 0)
2040 err(1, "initgroups failed");
2041
2042 if (setresgid(g, g, g) != 0)
2043 err(1, "setresgid failed");
2044
2045 if (setresuid(u, u, u) != 0)
2046 err(1, "setresuid failed");
2047
2048 verbose("Dropping privileges completed\n");
2049 }
2050
2051 /* Finally, run the Guest. This doesn't return. */
2052 run_guest();
2053}
2054/*:*/
2055
2056/*M:999
2057 * Mastery is done: you now know everything I do.
2058 *
2059 * But surely you have seen code, features and bugs in your wanderings which
2060 * you now yearn to attack? That is the real game, and I look forward to you
2061 * patching and forking lguest into the Your-Name-Here-visor.
2062 *
2063 * Farewell, and good coding!
2064 * Rusty Russell.
2065 */
diff --git a/Documentation/virtual/lguest/lguest.txt b/Documentation/virtual/lguest/lguest.txt
deleted file mode 100644
index bff0c554485d..000000000000
--- a/Documentation/virtual/lguest/lguest.txt
+++ /dev/null
@@ -1,129 +0,0 @@
1 __
2 (___()'`; Rusty's Remarkably Unreliable Guide to Lguest
3 /, /` - or, A Young Coder's Illustrated Hypervisor
4 \\"--\\ http://lguest.ozlabs.org
5
6Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel,
7for Linux developers and users to experiment with virtualization with the
8minimum of complexity. Nonetheless, it should have sufficient features to
9make it useful for specific tasks, and, of course, you are encouraged to fork
10and enhance it (see drivers/lguest/README).
11
12Features:
13
14- Kernel module which runs in a normal kernel.
15- Simple I/O model for communication.
16- Simple program to create new guests.
17- Logo contains cute puppies: http://lguest.ozlabs.org
18
19Developer features:
20
21- Fun to hack on.
22- No ABI: being tied to a specific kernel anyway, you can change anything.
23- Many opportunities for improvement or feature implementation.
24
25Running Lguest:
26
27- The easiest way to run lguest is to use same kernel as guest and host.
28 You can configure them differently, but usually it's easiest not to.
29
30 You will need to configure your kernel with the following options:
31
32 "General setup":
33 "Prompt for development and/or incomplete code/drivers" = Y
34 (CONFIG_EXPERIMENTAL=y)
35
36 "Processor type and features":
37 "Paravirtualized guest support" = Y
38 "Lguest guest support" = Y
39 "High Memory Support" = off/4GB
40 "Alignment value to which kernel should be aligned" = 0x100000
41 (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
42 CONFIG_PHYSICAL_ALIGN=0x100000)
43
44 "Device Drivers":
45 "Block devices"
46 "Virtio block driver (EXPERIMENTAL)" = M/Y
47 "Network device support"
48 "Universal TUN/TAP device driver support" = M/Y
49 "Virtio network driver (EXPERIMENTAL)" = M/Y
50 (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m)
51
52 "Virtualization"
53 "Linux hypervisor example code" = M/Y
54 (CONFIG_LGUEST=m)
55
56- A tool called "lguest" is available in this directory: type "make"
57 to build it. If you didn't build your kernel in-tree, use "make
58 O=<builddir>".
59
60- Create or find a root disk image. There are several useful ones
61 around, such as the xm-test tiny root image at
62 http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img
63
64 For more serious work, I usually use a distribution ISO image and
65 install it under qemu, then make multiple copies:
66
67 dd if=/dev/zero of=rootfile bs=1M count=2048
68 qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d
69
70 Make sure that you install a getty on /dev/hvc0 if you want to log in on the
71 console!
72
73- "modprobe lg" if you built it as a module.
74
75- Run an lguest as root:
76
77 Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
78 --block=rootfile root=/dev/vda
79
80 Explanation:
81 64: the amount of memory to use, in MB.
82
83 vmlinux: the kernel image found in the top of your build directory. You
84 can also use a standard bzImage.
85
86 --tunnet=192.168.19.1: configures a "tap" device for networking with this
87 IP address.
88
89 --block=rootfile: a file or block device which becomes /dev/vda
90 inside the guest.
91
92 root=/dev/vda: this (and anything else on the command line) are
93 kernel boot parameters.
94
95- Configuring networking. I usually have the host masquerade, using
96 "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 >
97 /proc/sys/net/ipv4/ip_forward". In this example, I would configure
98 eth0 inside the guest at 192.168.19.2.
99
100 Another method is to bridge the tap device to an external interface
101 using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest
102 to obtain an IP address. The bridge needs to be configured first:
103 this option simply adds the tap interface to it.
104
105 A simple example on my system:
106
107 ifconfig eth0 0.0.0.0
108 brctl addbr lg0
109 ifconfig lg0 up
110 brctl addif lg0 eth0
111 dhclient lg0
112
113 Then use --tunnet=bridge:lg0 when launching the guest.
114
115 See:
116
117 http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
118
119 for general information on how to get bridging to work.
120
121- Random number generation. Using the --rng option will provide a
122 /dev/hwrng in the guest that will read from the host's /dev/random.
123 Use this option in conjunction with rng-tools (see ../hw_random.txt)
124 to provide entropy to the guest kernel's /dev/random.
125
126There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest
127
128Good luck!
129Rusty Russell rusty@rustcorp.com.au.
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index f464f47bc60d..6752870c4970 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -117,7 +117,7 @@ can be influenced by kernel parameters:
117 117
118slub_min_objects=x (default 4) 118slub_min_objects=x (default 4)
119slub_min_order=x (default 0) 119slub_min_order=x (default 0)
120slub_max_order=x (default 1) 120slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
121 121
122slub_min_objects allows to specify how many objects must at least fit 122slub_min_objects allows to specify how many objects must at least fit
123into one slab in order for the allocation order to be acceptable. 123into one slab in order for the allocation order to be acceptable.
@@ -131,7 +131,10 @@ slub_min_objects.
131slub_max_order specified the order at which slub_min_objects should no 131slub_max_order specified the order at which slub_min_objects should no
132longer be checked. This is useful to avoid SLUB trying to generate 132longer be checked. This is useful to avoid SLUB trying to generate
133super large order pages to fit slub_min_objects of a slab cache with 133super large order pages to fit slub_min_objects of a slab cache with
134large object sizes into one high order page. 134large object sizes into one high order page. Setting command line
135parameter debug_guardpage_minorder=N (N > 0), forces setting
136slub_max_order to 0, what cause minimum possible order of slabs
137allocation.
135 138
136SLUB Debug output 139SLUB Debug output
137----------------- 140-----------------
diff --git a/Documentation/watchdog/00-INDEX b/Documentation/watchdog/00-INDEX
index fc51128071c2..fc9082a1477a 100644
--- a/Documentation/watchdog/00-INDEX
+++ b/Documentation/watchdog/00-INDEX
@@ -1,5 +1,7 @@
100-INDEX 100-INDEX
2 - this file. 2 - this file.
3convert_drivers_to_kernel_api.txt
4 - how-to for converting old watchdog drivers to the new kernel API.
3hpwdt.txt 5hpwdt.txt
4 - information on the HP iLO2 NMI watchdog 6 - information on the HP iLO2 NMI watchdog
5pcwd-watchdog.txt 7pcwd-watchdog.txt
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
index ae1e90036d06..be8119bb15d2 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
@@ -163,6 +163,25 @@ Here is a simple example for a watchdog device:
163+}; 163+};
164 164
165 165
166Handle the 'nowayout' feature
167-----------------------------
168
169A few drivers use nowayout statically, i.e. there is no module parameter for it
170and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be
171used. This needs to be converted by initializing the status variable of the
172watchdog_device like this:
173
174 .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
175
176Most drivers, however, also allow runtime configuration of nowayout, usually
177by adding a module parameter. The conversion for this would be something like:
178
179 watchdog_set_nowayout(&s3c2410_wdd, nowayout);
180
181The module parameter itself needs to stay, everything else related to nowayout
182can go, though. This will likely be some code in open(), close() or write().
183
184
166Register the watchdog device 185Register the watchdog device
167---------------------------- 186----------------------------
168 187
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index 4f7c894244d2..4b93c28e35c6 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -1,6 +1,6 @@
1The Linux WatchDog Timer Driver Core kernel API. 1The Linux WatchDog Timer Driver Core kernel API.
2=============================================== 2===============================================
3Last reviewed: 22-Jul-2011 3Last reviewed: 29-Nov-2011
4 4
5Wim Van Sebroeck <wim@iguana.be> 5Wim Van Sebroeck <wim@iguana.be>
6 6
@@ -142,6 +142,14 @@ bit-operations. The status bits that are defined are:
142* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog. 142* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
143 If this bit is set then the watchdog timer will not be able to stop. 143 If this bit is set then the watchdog timer will not be able to stop.
144 144
145 To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
146 timer device) you can either:
147 * set it statically in your watchdog_device struct with
148 .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
149 (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
150 * use the following helper function:
151 static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout)
152
145Note: The WatchDog Timer Driver Core supports the magic close feature and 153Note: The WatchDog Timer Driver Core supports the magic close feature and
146the nowayout feature. To use the magic close feature you must set the 154the nowayout feature. To use the magic close feature you must set the
147WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure. 155WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.