diff options
Diffstat (limited to 'Documentation')
132 files changed, 4554 insertions, 2745 deletions
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend new file mode 100644 index 000000000000..3d5951c8bf5f --- /dev/null +++ b/Documentation/ABI/stable/sysfs-bus-xen-backend | |||
@@ -0,0 +1,75 @@ | |||
1 | What: /sys/bus/xen-backend/devices/*/devtype | ||
2 | Date: Feb 2009 | ||
3 | KernelVersion: 2.6.38 | ||
4 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
5 | Description: | ||
6 | The type of the device. e.g., one of: 'vbd' (block), | ||
7 | 'vif' (network), or 'vfb' (framebuffer). | ||
8 | |||
9 | What: /sys/bus/xen-backend/devices/*/nodename | ||
10 | Date: Feb 2009 | ||
11 | KernelVersion: 2.6.38 | ||
12 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
13 | Description: | ||
14 | XenStore node (under /local/domain/NNN/) for this | ||
15 | backend device. | ||
16 | |||
17 | What: /sys/bus/xen-backend/devices/vbd-*/physical_device | ||
18 | Date: April 2011 | ||
19 | KernelVersion: 3.0 | ||
20 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
21 | Description: | ||
22 | The major:minor number (in hexidecimal) of the | ||
23 | physical device providing the storage for this backend | ||
24 | block device. | ||
25 | |||
26 | What: /sys/bus/xen-backend/devices/vbd-*/mode | ||
27 | Date: April 2011 | ||
28 | KernelVersion: 3.0 | ||
29 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
30 | Description: | ||
31 | Whether the block device is read-only ('r') or | ||
32 | read-write ('w'). | ||
33 | |||
34 | What: /sys/bus/xen-backend/devices/vbd-*/statistics/f_req | ||
35 | Date: April 2011 | ||
36 | KernelVersion: 3.0 | ||
37 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
38 | Description: | ||
39 | Number of flush requests from the frontend. | ||
40 | |||
41 | What: /sys/bus/xen-backend/devices/vbd-*/statistics/oo_req | ||
42 | Date: April 2011 | ||
43 | KernelVersion: 3.0 | ||
44 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
45 | Description: | ||
46 | Number of requests delayed because the backend was too | ||
47 | busy processing previous requests. | ||
48 | |||
49 | What: /sys/bus/xen-backend/devices/vbd-*/statistics/rd_req | ||
50 | Date: April 2011 | ||
51 | KernelVersion: 3.0 | ||
52 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
53 | Description: | ||
54 | Number of read requests from the frontend. | ||
55 | |||
56 | What: /sys/bus/xen-backend/devices/vbd-*/statistics/rd_sect | ||
57 | Date: April 2011 | ||
58 | KernelVersion: 3.0 | ||
59 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
60 | Description: | ||
61 | Number of sectors read by the frontend. | ||
62 | |||
63 | What: /sys/bus/xen-backend/devices/vbd-*/statistics/wr_req | ||
64 | Date: April 2011 | ||
65 | KernelVersion: 3.0 | ||
66 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
67 | Description: | ||
68 | Number of write requests from the frontend. | ||
69 | |||
70 | What: /sys/bus/xen-backend/devices/vbd-*/statistics/wr_sect | ||
71 | Date: April 2011 | ||
72 | KernelVersion: 3.0 | ||
73 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
74 | Description: | ||
75 | Number of sectors written by the frontend. | ||
diff --git a/Documentation/ABI/stable/sysfs-devices-system-xen_memory b/Documentation/ABI/stable/sysfs-devices-system-xen_memory new file mode 100644 index 000000000000..caa311d59ac1 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-devices-system-xen_memory | |||
@@ -0,0 +1,77 @@ | |||
1 | What: /sys/devices/system/xen_memory/xen_memory0/max_retry_count | ||
2 | Date: May 2011 | ||
3 | KernelVersion: 2.6.39 | ||
4 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
5 | Description: | ||
6 | The maximum number of times the balloon driver will | ||
7 | attempt to increase the balloon before giving up. See | ||
8 | also 'retry_count' below. | ||
9 | A value of zero means retry forever and is the default one. | ||
10 | |||
11 | What: /sys/devices/system/xen_memory/xen_memory0/max_schedule_delay | ||
12 | Date: May 2011 | ||
13 | KernelVersion: 2.6.39 | ||
14 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
15 | Description: | ||
16 | The limit that 'schedule_delay' (see below) will be | ||
17 | increased to. The default value is 32 seconds. | ||
18 | |||
19 | What: /sys/devices/system/xen_memory/xen_memory0/retry_count | ||
20 | Date: May 2011 | ||
21 | KernelVersion: 2.6.39 | ||
22 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
23 | Description: | ||
24 | The current number of times that the balloon driver | ||
25 | has attempted to increase the size of the balloon. | ||
26 | The default value is one. With max_retry_count being | ||
27 | zero (unlimited), this means that the driver will attempt | ||
28 | to retry with a 'schedule_delay' delay. | ||
29 | |||
30 | What: /sys/devices/system/xen_memory/xen_memory0/schedule_delay | ||
31 | Date: May 2011 | ||
32 | KernelVersion: 2.6.39 | ||
33 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
34 | Description: | ||
35 | The time (in seconds) to wait between attempts to | ||
36 | increase the balloon. Each time the balloon cannot be | ||
37 | increased, 'schedule_delay' is increased (until | ||
38 | 'max_schedule_delay' is reached at which point it | ||
39 | will use the max value). | ||
40 | |||
41 | What: /sys/devices/system/xen_memory/xen_memory0/target | ||
42 | Date: April 2008 | ||
43 | KernelVersion: 2.6.26 | ||
44 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
45 | Description: | ||
46 | The target number of pages to adjust this domain's | ||
47 | memory reservation to. | ||
48 | |||
49 | What: /sys/devices/system/xen_memory/xen_memory0/target_kb | ||
50 | Date: April 2008 | ||
51 | KernelVersion: 2.6.26 | ||
52 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
53 | Description: | ||
54 | As target above, except the value is in KiB. | ||
55 | |||
56 | What: /sys/devices/system/xen_memory/xen_memory0/info/current_kb | ||
57 | Date: April 2008 | ||
58 | KernelVersion: 2.6.26 | ||
59 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
60 | Description: | ||
61 | Current size (in KiB) of this domain's memory | ||
62 | reservation. | ||
63 | |||
64 | What: /sys/devices/system/xen_memory/xen_memory0/info/high_kb | ||
65 | Date: April 2008 | ||
66 | KernelVersion: 2.6.26 | ||
67 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
68 | Description: | ||
69 | Amount (in KiB) of high memory in the balloon. | ||
70 | |||
71 | What: /sys/devices/system/xen_memory/xen_memory0/info/low_kb | ||
72 | Date: April 2008 | ||
73 | KernelVersion: 2.6.26 | ||
74 | Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
75 | Description: | ||
76 | Amount (in KiB) of low (or normal) memory in the | ||
77 | balloon. | ||
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 349ecf26ce10..34f51100f029 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci | |||
@@ -66,6 +66,24 @@ Description: | |||
66 | re-discover previously removed devices. | 66 | re-discover previously removed devices. |
67 | Depends on CONFIG_HOTPLUG. | 67 | Depends on CONFIG_HOTPLUG. |
68 | 68 | ||
69 | What: /sys/bus/pci/devices/.../msi_irqs/ | ||
70 | Date: September, 2011 | ||
71 | Contact: Neil Horman <nhorman@tuxdriver.com> | ||
72 | Description: | ||
73 | The /sys/devices/.../msi_irqs directory contains a variable set | ||
74 | of sub-directories, with each sub-directory being named after a | ||
75 | corresponding msi irq vector allocated to that device. Each | ||
76 | numbered sub-directory N contains attributes of that irq. | ||
77 | Note that this directory is not created for device drivers which | ||
78 | do not support msi irqs | ||
79 | |||
80 | What: /sys/bus/pci/devices/.../msi_irqs/<N>/mode | ||
81 | Date: September 2011 | ||
82 | Contact: Neil Horman <nhorman@tuxdriver.com> | ||
83 | Description: | ||
84 | This attribute indicates the mode that the irq vector named by | ||
85 | the parent directory is in (msi vs. msix) | ||
86 | |||
69 | What: /sys/bus/pci/devices/.../remove | 87 | What: /sys/bus/pci/devices/.../remove |
70 | Date: January 2009 | 88 | Date: January 2009 |
71 | Contact: Linux PCI developers <linux-pci@vger.kernel.org> | 89 | Contact: Linux PCI developers <linux-pci@vger.kernel.org> |
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb index e647378e9e88..b4f548792e32 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb +++ b/Documentation/ABI/testing/sysfs-bus-usb | |||
@@ -119,6 +119,31 @@ Description: | |||
119 | Write a 1 to force the device to disconnect | 119 | Write a 1 to force the device to disconnect |
120 | (equivalent to unplugging a wired USB device). | 120 | (equivalent to unplugging a wired USB device). |
121 | 121 | ||
122 | What: /sys/bus/usb/drivers/.../new_id | ||
123 | Date: October 2011 | ||
124 | Contact: linux-usb@vger.kernel.org | ||
125 | Description: | ||
126 | Writing a device ID to this file will attempt to | ||
127 | dynamically add a new device ID to a USB device driver. | ||
128 | This may allow the driver to support more hardware than | ||
129 | was included in the driver's static device ID support | ||
130 | table at compile time. The format for the device ID is: | ||
131 | idVendor idProduct bInterfaceClass. | ||
132 | The vendor ID and device ID fields are required, the | ||
133 | interface class is optional. | ||
134 | Upon successfully adding an ID, the driver will probe | ||
135 | for the device and attempt to bind to it. For example: | ||
136 | # echo "8086 10f5" > /sys/bus/usb/drivers/foo/new_id | ||
137 | |||
138 | What: /sys/bus/usb-serial/drivers/.../new_id | ||
139 | Date: October 2011 | ||
140 | Contact: linux-usb@vger.kernel.org | ||
141 | Description: | ||
142 | For serial USB drivers, this attribute appears under the | ||
143 | extra bus folder "usb-serial" in sysfs; apart from that | ||
144 | difference, all descriptions from the entry | ||
145 | "/sys/bus/usb/drivers/.../new_id" apply. | ||
146 | |||
122 | What: /sys/bus/usb/drivers/.../remove_id | 147 | What: /sys/bus/usb/drivers/.../remove_id |
123 | Date: November 2009 | 148 | Date: November 2009 |
124 | Contact: CHENG Renquan <rqcheng@smu.edu.sg> | 149 | Contact: CHENG Renquan <rqcheng@smu.edu.sg> |
diff --git a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration new file mode 100644 index 000000000000..4cf1e72222d9 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration | |||
@@ -0,0 +1,12 @@ | |||
1 | What: Attribute for calibrating ST-Ericsson AB8500 Real Time Clock | ||
2 | Date: Oct 2011 | ||
3 | KernelVersion: 3.0 | ||
4 | Contact: Mark Godfrey <mark.godfrey@stericsson.com> | ||
5 | Description: The rtc_calibration attribute allows the userspace to | ||
6 | calibrate the AB8500.s 32KHz Real Time Clock. | ||
7 | Every 60 seconds the AB8500 will correct the RTC's value | ||
8 | by adding to it the value of this attribute. | ||
9 | The range of the attribute is -127 to +127 in units of | ||
10 | 30.5 micro-seconds (half-parts-per-million of the 32KHz clock) | ||
11 | Users: The /vendor/st-ericsson/base_utilities/core/rtc_calibration | ||
12 | daemon uses this interface. | ||
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-docg3 b/Documentation/ABI/testing/sysfs-devices-platform-docg3 new file mode 100644 index 000000000000..8aa36716882f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-platform-docg3 | |||
@@ -0,0 +1,34 @@ | |||
1 | What: /sys/devices/platform/docg3/f[0-3]_dps[01]_is_keylocked | ||
2 | Date: November 2011 | ||
3 | KernelVersion: 3.3 | ||
4 | Contact: Robert Jarzmik <robert.jarzmik@free.fr> | ||
5 | Description: | ||
6 | Show whether the floor (0 to 4), protection area (0 or 1) is | ||
7 | keylocked. Each docg3 chip (or floor) has 2 protection areas, | ||
8 | which can cover any part of it, block aligned, called DPS. | ||
9 | The protection has information embedded whether it blocks reads, | ||
10 | writes or both. | ||
11 | The result is: | ||
12 | 0 -> the DPS is not keylocked | ||
13 | 1 -> the DPS is keylocked | ||
14 | Users: None identified so far. | ||
15 | |||
16 | What: /sys/devices/platform/docg3/f[0-3]_dps[01]_protection_key | ||
17 | Date: November 2011 | ||
18 | KernelVersion: 3.3 | ||
19 | Contact: Robert Jarzmik <robert.jarzmik@free.fr> | ||
20 | Description: | ||
21 | Enter the protection key for the floor (0 to 4), protection area | ||
22 | (0 or 1). Each docg3 chip (or floor) has 2 protection areas, | ||
23 | which can cover any part of it, block aligned, called DPS. | ||
24 | The protection has information embedded whether it blocks reads, | ||
25 | writes or both. | ||
26 | The protection key is a string of 8 bytes (value 0-255). | ||
27 | Entering the correct value toggle the lock, and can be observed | ||
28 | through f[0-3]_dps[01]_is_keylocked. | ||
29 | Possible values are: | ||
30 | - 8 bytes | ||
31 | Typical values are: | ||
32 | - "00000000" | ||
33 | - "12345678" | ||
34 | Users: None identified so far. | ||
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff index 9aec8ef228b0..167d9032b970 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff +++ b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff | |||
@@ -1,7 +1,7 @@ | |||
1 | What: /sys/module/hid_logitech/drivers/hid:logitech/<dev>/range. | 1 | What: /sys/module/hid_logitech/drivers/hid:logitech/<dev>/range. |
2 | Date: July 2011 | 2 | Date: July 2011 |
3 | KernelVersion: 3.2 | 3 | KernelVersion: 3.2 |
4 | Contact: Michal Malý <madcatxster@gmail.com> | 4 | Contact: Michal Malý <madcatxster@gmail.com> |
5 | Description: Display minimum, maximum and current range of the steering | 5 | Description: Display minimum, maximum and current range of the steering |
6 | wheel. Writing a value within min and max boundaries sets the | 6 | wheel. Writing a value within min and max boundaries sets the |
7 | range of the wheel. | 7 | range of the wheel. |
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-multitouch b/Documentation/ABI/testing/sysfs-driver-hid-multitouch new file mode 100644 index 000000000000..f79839d1af37 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-hid-multitouch | |||
@@ -0,0 +1,9 @@ | |||
1 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/quirks | ||
2 | Date: November 2011 | ||
3 | Contact: Benjamin Tissoires <benjamin.tissoires@gmail.com> | ||
4 | Description: The integer value of this attribute corresponds to the | ||
5 | quirks actually in place to handle the device's protocol. | ||
6 | When read, this attribute returns the current settings (see | ||
7 | MT_QUIRKS_* in hid-multitouch.c). | ||
8 | When written this attribute change on the fly the quirks, then | ||
9 | the protocol to handle the device. | ||
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku b/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku new file mode 100644 index 000000000000..189dc43891bf --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku | |||
@@ -0,0 +1,135 @@ | |||
1 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/actual_profile | ||
2 | Date: June 2011 | ||
3 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
4 | Description: The integer value of this attribute ranges from 0-4. | ||
5 | When read, this attribute returns the number of the actual | ||
6 | profile. This value is persistent, so its equivalent to the | ||
7 | profile that's active when the device is powered on next time. | ||
8 | When written, this file sets the number of the startup profile | ||
9 | and the device activates this profile immediately. | ||
10 | Users: http://roccat.sourceforge.net | ||
11 | |||
12 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/info | ||
13 | Date: June 2011 | ||
14 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
15 | Description: When read, this file returns general data like firmware version. | ||
16 | The data is 6 bytes long. | ||
17 | This file is readonly. | ||
18 | Users: http://roccat.sourceforge.net | ||
19 | |||
20 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/key_mask | ||
21 | Date: June 2011 | ||
22 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
23 | Description: When written, this file lets one deactivate certain keys like | ||
24 | windows and application keys, to prevent accidental presses. | ||
25 | Profile number for which this settings occur is included in | ||
26 | written data. The data has to be 6 bytes long. | ||
27 | Before reading this file, control has to be written to select | ||
28 | which profile to read. | ||
29 | Users: http://roccat.sourceforge.net | ||
30 | |||
31 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_capslock | ||
32 | Date: June 2011 | ||
33 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
34 | Description: When written, this file lets one set the function of the | ||
35 | capslock key for a specific profile. Profile number is included | ||
36 | in written data. The data has to be 6 bytes long. | ||
37 | Before reading this file, control has to be written to select | ||
38 | which profile to read. | ||
39 | Users: http://roccat.sourceforge.net | ||
40 | |||
41 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_easyzone | ||
42 | Date: June 2011 | ||
43 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
44 | Description: When written, this file lets one set the function of the | ||
45 | easyzone keys for a specific profile. Profile number is included | ||
46 | in written data. The data has to be 65 bytes long. | ||
47 | Before reading this file, control has to be written to select | ||
48 | which profile to read. | ||
49 | Users: http://roccat.sourceforge.net | ||
50 | |||
51 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_function | ||
52 | Date: June 2011 | ||
53 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
54 | Description: When written, this file lets one set the function of the | ||
55 | function keys for a specific profile. Profile number is included | ||
56 | in written data. The data has to be 41 bytes long. | ||
57 | Before reading this file, control has to be written to select | ||
58 | which profile to read. | ||
59 | Users: http://roccat.sourceforge.net | ||
60 | |||
61 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_macro | ||
62 | Date: June 2011 | ||
63 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
64 | Description: When written, this file lets one set the function of the macro | ||
65 | keys for a specific profile. Profile number is included in | ||
66 | written data. The data has to be 35 bytes long. | ||
67 | Before reading this file, control has to be written to select | ||
68 | which profile to read. | ||
69 | Users: http://roccat.sourceforge.net | ||
70 | |||
71 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_media | ||
72 | Date: June 2011 | ||
73 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
74 | Description: When written, this file lets one set the function of the media | ||
75 | keys for a specific profile. Profile number is included in | ||
76 | written data. The data has to be 29 bytes long. | ||
77 | Before reading this file, control has to be written to select | ||
78 | which profile to read. | ||
79 | Users: http://roccat.sourceforge.net | ||
80 | |||
81 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_thumbster | ||
82 | Date: June 2011 | ||
83 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
84 | Description: When written, this file lets one set the function of the | ||
85 | thumbster keys for a specific profile. Profile number is included | ||
86 | in written data. The data has to be 23 bytes long. | ||
87 | Before reading this file, control has to be written to select | ||
88 | which profile to read. | ||
89 | Users: http://roccat.sourceforge.net | ||
90 | |||
91 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/last_set | ||
92 | Date: June 2011 | ||
93 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
94 | Description: When written, this file lets one set the time in secs since | ||
95 | epoch in which the last configuration took place. | ||
96 | The data has to be 20 bytes long. | ||
97 | Users: http://roccat.sourceforge.net | ||
98 | |||
99 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/light | ||
100 | Date: June 2011 | ||
101 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
102 | Description: When written, this file lets one set the backlight intensity for | ||
103 | a specific profile. Profile number is included in written data. | ||
104 | The data has to be 10 bytes long. | ||
105 | Before reading this file, control has to be written to select | ||
106 | which profile to read. | ||
107 | Users: http://roccat.sourceforge.net | ||
108 | |||
109 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/macro | ||
110 | Date: June 2011 | ||
111 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
112 | Description: When written, this file lets one store macros with max 500 | ||
113 | keystrokes for a specific button for a specific profile. | ||
114 | Button and profile numbers are included in written data. | ||
115 | The data has to be 2083 bytes long. | ||
116 | Before reading this file, control has to be written to select | ||
117 | which profile and key to read. | ||
118 | Users: http://roccat.sourceforge.net | ||
119 | |||
120 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/control | ||
121 | Date: June 2011 | ||
122 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
123 | Description: When written, this file lets one select which data from which | ||
124 | profile will be read next. The data has to be 3 bytes long. | ||
125 | This file is writeonly. | ||
126 | Users: http://roccat.sourceforge.net | ||
127 | |||
128 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/talk | ||
129 | Date: June 2011 | ||
130 | Contact: Stefan Achatz <erazor_de@users.sourceforge.net> | ||
131 | Description: When written, this file lets one trigger easyshift functionality | ||
132 | from the host. | ||
133 | The data has to be 16 bytes long. | ||
134 | This file is writeonly. | ||
135 | Users: http://roccat.sourceforge.net | ||
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-wiimote b/Documentation/ABI/testing/sysfs-driver-hid-wiimote index 5d5a16ea57c6..3d98009f447a 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-wiimote +++ b/Documentation/ABI/testing/sysfs-driver-hid-wiimote | |||
@@ -8,3 +8,15 @@ Contact: David Herrmann <dh.herrmann@googlemail.com> | |||
8 | Description: Make it possible to set/get current led state. Reading from it | 8 | Description: Make it possible to set/get current led state. Reading from it |
9 | returns 0 if led is off and 1 if it is on. Writing 0 to it | 9 | returns 0 if led is off and 1 if it is on. Writing 0 to it |
10 | disables the led, writing 1 enables it. | 10 | disables the led, writing 1 enables it. |
11 | |||
12 | What: /sys/bus/hid/drivers/wiimote/<dev>/extension | ||
13 | Date: August 2011 | ||
14 | KernelVersion: 3.2 | ||
15 | Contact: David Herrmann <dh.herrmann@googlemail.com> | ||
16 | Description: This file contains the currently connected and initialized | ||
17 | extensions. It can be one of: none, motionp, nunchuck, classic, | ||
18 | motionp+nunchuck, motionp+classic | ||
19 | motionp is the official Nintendo Motion+ extension, nunchuck is | ||
20 | the official Nintendo Nunchuck extension and classic is the | ||
21 | Nintendo Classic Controller extension. The motionp extension can | ||
22 | be combined with the other two. | ||
diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom index 82d4df136444..0130d6683c14 100644 --- a/Documentation/ABI/testing/sysfs-driver-wacom +++ b/Documentation/ABI/testing/sysfs-driver-wacom | |||
@@ -15,9 +15,9 @@ Contact: linux-input@vger.kernel.org | |||
15 | Description: | 15 | Description: |
16 | Attribute group for control of the status LEDs and the OLEDs. | 16 | Attribute group for control of the status LEDs and the OLEDs. |
17 | This attribute group is only available for Intuos 4 M, L, | 17 | This attribute group is only available for Intuos 4 M, L, |
18 | and XL (with LEDs and OLEDs) and Cintiq 21UX2 (LEDs only). | 18 | and XL (with LEDs and OLEDs) and Cintiq 21UX2 and Cintiq 24HD |
19 | Therefore its presence implicitly signifies the presence of | 19 | (LEDs only). Therefore its presence implicitly signifies the |
20 | said LEDs and OLEDs on the tablet device. | 20 | presence of said LEDs and OLEDs on the tablet device. |
21 | 21 | ||
22 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status0_luminance | 22 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status0_luminance |
23 | Date: August 2011 | 23 | Date: August 2011 |
@@ -41,16 +41,17 @@ Date: August 2011 | |||
41 | Contact: linux-input@vger.kernel.org | 41 | Contact: linux-input@vger.kernel.org |
42 | Description: | 42 | Description: |
43 | Writing to this file sets which one of the four (for Intuos 4) | 43 | Writing to this file sets which one of the four (for Intuos 4) |
44 | or of the right four (for Cintiq 21UX2) status LEDs is active (0..3). | 44 | or of the right four (for Cintiq 21UX2 and Cintiq 24HD) status |
45 | The other three LEDs on the same side are always inactive. | 45 | LEDs is active (0..3). The other three LEDs on the same side are |
46 | always inactive. | ||
46 | 47 | ||
47 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status_led1_select | 48 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/status_led1_select |
48 | Date: September 2011 | 49 | Date: September 2011 |
49 | Contact: linux-input@vger.kernel.org | 50 | Contact: linux-input@vger.kernel.org |
50 | Description: | 51 | Description: |
51 | Writing to this file sets which one of the left four (for Cintiq 21UX2) | 52 | Writing to this file sets which one of the left four (for Cintiq 21UX2 |
52 | status LEDs is active (0..3). The other three LEDs on the left are always | 53 | and Cintiq 24HD) status LEDs is active (0..3). The other three LEDs on |
53 | inactive. | 54 | the left are always inactive. |
54 | 55 | ||
55 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/buttons_luminance | 56 | What: /sys/bus/usb/devices/<busnum>-<devnum>:<cfg>.<intf>/wacom_led/buttons_luminance |
56 | Date: August 2011 | 57 | Date: August 2011 |
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab index 8b093f8222d3..91bd6ca5440f 100644 --- a/Documentation/ABI/testing/sysfs-kernel-slab +++ b/Documentation/ABI/testing/sysfs-kernel-slab | |||
@@ -346,6 +346,10 @@ Description: | |||
346 | number of objects per slab. If a slab cannot be allocated | 346 | number of objects per slab. If a slab cannot be allocated |
347 | because of fragmentation, SLUB will retry with the minimum order | 347 | because of fragmentation, SLUB will retry with the minimum order |
348 | possible depending on its characteristics. | 348 | possible depending on its characteristics. |
349 | When debug_guardpage_minorder=N (N > 0) parameter is specified | ||
350 | (see Documentation/kernel-parameters.txt), the minimum possible | ||
351 | order is used and this sysfs entry can not be used to change | ||
352 | the order at run time. | ||
349 | 353 | ||
350 | What: /sys/kernel/slab/cache/order_fallback | 354 | What: /sys/kernel/slab/cache/order_fallback |
351 | Date: April 2008 | 355 | Date: April 2008 |
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl index 08ff908aa7a2..24979f691e3e 100644 --- a/Documentation/DocBook/debugobjects.tmpl +++ b/Documentation/DocBook/debugobjects.tmpl | |||
@@ -96,6 +96,7 @@ | |||
96 | <listitem><para>debug_object_deactivate</para></listitem> | 96 | <listitem><para>debug_object_deactivate</para></listitem> |
97 | <listitem><para>debug_object_destroy</para></listitem> | 97 | <listitem><para>debug_object_destroy</para></listitem> |
98 | <listitem><para>debug_object_free</para></listitem> | 98 | <listitem><para>debug_object_free</para></listitem> |
99 | <listitem><para>debug_object_assert_init</para></listitem> | ||
99 | </itemizedlist> | 100 | </itemizedlist> |
100 | Each of these functions takes the address of the real object and | 101 | Each of these functions takes the address of the real object and |
101 | a pointer to the object type specific debug description | 102 | a pointer to the object type specific debug description |
@@ -273,6 +274,26 @@ | |||
273 | debug checks. | 274 | debug checks. |
274 | </para> | 275 | </para> |
275 | </sect1> | 276 | </sect1> |
277 | |||
278 | <sect1 id="debug_object_assert_init"> | ||
279 | <title>debug_object_assert_init</title> | ||
280 | <para> | ||
281 | This function is called to assert that an object has been | ||
282 | initialized. | ||
283 | </para> | ||
284 | <para> | ||
285 | When the real object is not tracked by debugobjects, it calls | ||
286 | fixup_assert_init of the object type description structure | ||
287 | provided by the caller, with the hardcoded object state | ||
288 | ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem | ||
289 | by calling debug_object_init and other specific initializing | ||
290 | functions. | ||
291 | </para> | ||
292 | <para> | ||
293 | When the real object is already tracked by debugobjects it is | ||
294 | ignored. | ||
295 | </para> | ||
296 | </sect1> | ||
276 | </chapter> | 297 | </chapter> |
277 | <chapter id="fixupfunctions"> | 298 | <chapter id="fixupfunctions"> |
278 | <title>Fixup functions</title> | 299 | <title>Fixup functions</title> |
@@ -381,6 +402,35 @@ | |||
381 | statistics. | 402 | statistics. |
382 | </para> | 403 | </para> |
383 | </sect1> | 404 | </sect1> |
405 | <sect1 id="fixup_assert_init"> | ||
406 | <title>fixup_assert_init</title> | ||
407 | <para> | ||
408 | This function is called from the debug code whenever a problem | ||
409 | in debug_object_assert_init is detected. | ||
410 | </para> | ||
411 | <para> | ||
412 | Called from debug_object_assert_init() with a hardcoded state | ||
413 | ODEBUG_STATE_NOTAVAILABLE when the object is not found in the | ||
414 | debug bucket. | ||
415 | </para> | ||
416 | <para> | ||
417 | The function returns 1 when the fixup was successful, | ||
418 | otherwise 0. The return value is used to update the | ||
419 | statistics. | ||
420 | </para> | ||
421 | <para> | ||
422 | Note, this function should make sure debug_object_init() is | ||
423 | called before returning. | ||
424 | </para> | ||
425 | <para> | ||
426 | The handling of statically initialized objects is a special | ||
427 | case. The fixup function should check if this is a legitimate | ||
428 | case of a statically initialized object or not. In this case only | ||
429 | debug_object_init() should be called to make the object known to | ||
430 | the tracker. Then the function should return 0 because this is not | ||
431 | a real fixup. | ||
432 | </para> | ||
433 | </sect1> | ||
384 | </chapter> | 434 | </chapter> |
385 | <chapter id="bugs"> | 435 | <chapter id="bugs"> |
386 | <title>Known Bugs And Assumptions</title> | 436 | <title>Known Bugs And Assumptions</title> |
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl index 5de23c007078..cab4ec58e46e 100644 --- a/Documentation/DocBook/writing-an-alsa-driver.tmpl +++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl | |||
@@ -404,7 +404,7 @@ | |||
404 | /* SNDRV_CARDS: maximum number of cards supported by this module */ | 404 | /* SNDRV_CARDS: maximum number of cards supported by this module */ |
405 | static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; | 405 | static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; |
406 | static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; | 406 | static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; |
407 | static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; | 407 | static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; |
408 | 408 | ||
409 | /* definition of the chip-specific record */ | 409 | /* definition of the chip-specific record */ |
410 | struct mychip { | 410 | struct mychip { |
diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 81bc1a9ab9d8..f7ade3b3b40d 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO | |||
@@ -275,8 +275,8 @@ versions. | |||
275 | If no 2.6.x.y kernel is available, then the highest numbered 2.6.x | 275 | If no 2.6.x.y kernel is available, then the highest numbered 2.6.x |
276 | kernel is the current stable kernel. | 276 | kernel is the current stable kernel. |
277 | 277 | ||
278 | 2.6.x.y are maintained by the "stable" team <stable@kernel.org>, and are | 278 | 2.6.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and |
279 | released as needs dictate. The normal release period is approximately | 279 | are released as needs dictate. The normal release period is approximately |
280 | two weeks, but it can be longer if there are no pressing problems. A | 280 | two weeks, but it can be longer if there are no pressing problems. A |
281 | security-related problem, instead, can cause a release to happen almost | 281 | security-related problem, instead, can cause a release to happen almost |
282 | instantly. | 282 | instantly. |
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 0c134f8afc6f..bff2d8be1e18 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -328,6 +328,12 @@ over a rather long period of time, but improvements are always welcome! | |||
328 | RCU rather than SRCU, because RCU is almost always faster and | 328 | RCU rather than SRCU, because RCU is almost always faster and |
329 | easier to use than is SRCU. | 329 | easier to use than is SRCU. |
330 | 330 | ||
331 | If you need to enter your read-side critical section in a | ||
332 | hardirq or exception handler, and then exit that same read-side | ||
333 | critical section in the task that was interrupted, then you need | ||
334 | to srcu_read_lock_raw() and srcu_read_unlock_raw(), which avoid | ||
335 | the lockdep checking that would otherwise this practice illegal. | ||
336 | |||
331 | Also unlike other forms of RCU, explicit initialization | 337 | Also unlike other forms of RCU, explicit initialization |
332 | and cleanup is required via init_srcu_struct() and | 338 | and cleanup is required via init_srcu_struct() and |
333 | cleanup_srcu_struct(). These are passed a "struct srcu_struct" | 339 | cleanup_srcu_struct(). These are passed a "struct srcu_struct" |
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 31852705b586..bf778332a28f 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt | |||
@@ -38,11 +38,11 @@ o How can the updater tell when a grace period has completed | |||
38 | 38 | ||
39 | Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the | 39 | Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the |
40 | same effect, but require that the readers manipulate CPU-local | 40 | same effect, but require that the readers manipulate CPU-local |
41 | counters. These counters allow limited types of blocking | 41 | counters. These counters allow limited types of blocking within |
42 | within RCU read-side critical sections. SRCU also uses | 42 | RCU read-side critical sections. SRCU also uses CPU-local |
43 | CPU-local counters, and permits general blocking within | 43 | counters, and permits general blocking within RCU read-side |
44 | RCU read-side critical sections. These two variants of | 44 | critical sections. These variants of RCU detect grace periods |
45 | RCU detect grace periods by sampling these counters. | 45 | by sampling these counters. |
46 | 46 | ||
47 | o If I am running on a uniprocessor kernel, which can only do one | 47 | o If I am running on a uniprocessor kernel, which can only do one |
48 | thing at a time, why should I wait for a grace period? | 48 | thing at a time, why should I wait for a grace period? |
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 4e959208f736..083d88cbc089 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt | |||
@@ -101,6 +101,11 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that | |||
101 | CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning | 101 | CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning |
102 | messages. | 102 | messages. |
103 | 103 | ||
104 | o A hardware or software issue shuts off the scheduler-clock | ||
105 | interrupt on a CPU that is not in dyntick-idle mode. This | ||
106 | problem really has happened, and seems to be most likely to | ||
107 | result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. | ||
108 | |||
104 | o A bug in the RCU implementation. | 109 | o A bug in the RCU implementation. |
105 | 110 | ||
106 | o A hardware failure. This is quite unlikely, but has occurred | 111 | o A hardware failure. This is quite unlikely, but has occurred |
@@ -109,12 +114,11 @@ o A hardware failure. This is quite unlikely, but has occurred | |||
109 | This resulted in a series of RCU CPU stall warnings, eventually | 114 | This resulted in a series of RCU CPU stall warnings, eventually |
110 | leading the realization that the CPU had failed. | 115 | leading the realization that the CPU had failed. |
111 | 116 | ||
112 | The RCU, RCU-sched, and RCU-bh implementations have CPU stall | 117 | The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning. |
113 | warning. SRCU does not have its own CPU stall warnings, but its | 118 | SRCU does not have its own CPU stall warnings, but its calls to |
114 | calls to synchronize_sched() will result in RCU-sched detecting | 119 | synchronize_sched() will result in RCU-sched detecting RCU-sched-related |
115 | RCU-sched-related CPU stalls. Please note that RCU only detects | 120 | CPU stalls. Please note that RCU only detects CPU stalls when there is |
116 | CPU stalls when there is a grace period in progress. No grace period, | 121 | a grace period in progress. No grace period, no CPU stall warnings. |
117 | no CPU stall warnings. | ||
118 | 122 | ||
119 | To diagnose the cause of the stall, inspect the stack traces. | 123 | To diagnose the cause of the stall, inspect the stack traces. |
120 | The offending function will usually be near the top of the stack. | 124 | The offending function will usually be near the top of the stack. |
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 783d6c134d3f..d67068d0d2b9 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
@@ -61,11 +61,24 @@ nreaders This is the number of RCU reading threads supported. | |||
61 | To properly exercise RCU implementations with preemptible | 61 | To properly exercise RCU implementations with preemptible |
62 | read-side critical sections. | 62 | read-side critical sections. |
63 | 63 | ||
64 | onoff_interval | ||
65 | The number of seconds between each attempt to execute a | ||
66 | randomly selected CPU-hotplug operation. Defaults to | ||
67 | zero, which disables CPU hotplugging. In HOTPLUG_CPU=n | ||
68 | kernels, rcutorture will silently refuse to do any | ||
69 | CPU-hotplug operations regardless of what value is | ||
70 | specified for onoff_interval. | ||
71 | |||
64 | shuffle_interval | 72 | shuffle_interval |
65 | The number of seconds to keep the test threads affinitied | 73 | The number of seconds to keep the test threads affinitied |
66 | to a particular subset of the CPUs, defaults to 3 seconds. | 74 | to a particular subset of the CPUs, defaults to 3 seconds. |
67 | Used in conjunction with test_no_idle_hz. | 75 | Used in conjunction with test_no_idle_hz. |
68 | 76 | ||
77 | shutdown_secs The number of seconds to run the test before terminating | ||
78 | the test and powering off the system. The default is | ||
79 | zero, which disables test termination and system shutdown. | ||
80 | This capability is useful for automated testing. | ||
81 | |||
69 | stat_interval The number of seconds between output of torture | 82 | stat_interval The number of seconds between output of torture |
70 | statistics (via printk()). Regardless of the interval, | 83 | statistics (via printk()). Regardless of the interval, |
71 | statistics are printed when the module is unloaded. | 84 | statistics are printed when the module is unloaded. |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index aaf65f6c6cd7..49587abfc2f7 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -105,14 +105,10 @@ o "dt" is the current value of the dyntick counter that is incremented | |||
105 | or one greater than the interrupt-nesting depth otherwise. | 105 | or one greater than the interrupt-nesting depth otherwise. |
106 | The number after the second "/" is the NMI nesting depth. | 106 | The number after the second "/" is the NMI nesting depth. |
107 | 107 | ||
108 | This field is displayed only for CONFIG_NO_HZ kernels. | ||
109 | |||
110 | o "df" is the number of times that some other CPU has forced a | 108 | o "df" is the number of times that some other CPU has forced a |
111 | quiescent state on behalf of this CPU due to this CPU being in | 109 | quiescent state on behalf of this CPU due to this CPU being in |
112 | dynticks-idle state. | 110 | dynticks-idle state. |
113 | 111 | ||
114 | This field is displayed only for CONFIG_NO_HZ kernels. | ||
115 | |||
116 | o "of" is the number of times that some other CPU has forced a | 112 | o "of" is the number of times that some other CPU has forced a |
117 | quiescent state on behalf of this CPU due to this CPU being | 113 | quiescent state on behalf of this CPU due to this CPU being |
118 | offline. In a perfect world, this might never happen, but it | 114 | offline. In a perfect world, this might never happen, but it |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 6ef692667e2f..6bbe8dcdc3da 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -4,6 +4,7 @@ to start learning about RCU: | |||
4 | 1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ | 4 | 1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ |
5 | 2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ | 5 | 2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ |
6 | 3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ | 6 | 3. RCU part 3: the RCU API http://lwn.net/Articles/264090/ |
7 | 4. The RCU API, 2010 Edition http://lwn.net/Articles/418853/ | ||
7 | 8 | ||
8 | 9 | ||
9 | What is RCU? | 10 | What is RCU? |
@@ -834,6 +835,8 @@ SRCU: Critical sections Grace period Barrier | |||
834 | 835 | ||
835 | srcu_read_lock synchronize_srcu N/A | 836 | srcu_read_lock synchronize_srcu N/A |
836 | srcu_read_unlock synchronize_srcu_expedited | 837 | srcu_read_unlock synchronize_srcu_expedited |
838 | srcu_read_lock_raw | ||
839 | srcu_read_unlock_raw | ||
837 | srcu_dereference | 840 | srcu_dereference |
838 | 841 | ||
839 | SRCU: Initialization/cleanup | 842 | SRCU: Initialization/cleanup |
@@ -855,27 +858,33 @@ list can be helpful: | |||
855 | 858 | ||
856 | a. Will readers need to block? If so, you need SRCU. | 859 | a. Will readers need to block? If so, you need SRCU. |
857 | 860 | ||
858 | b. What about the -rt patchset? If readers would need to block | 861 | b. Is it necessary to start a read-side critical section in a |
862 | hardirq handler or exception handler, and then to complete | ||
863 | this read-side critical section in the task that was | ||
864 | interrupted? If so, you need SRCU's srcu_read_lock_raw() and | ||
865 | srcu_read_unlock_raw() primitives. | ||
866 | |||
867 | c. What about the -rt patchset? If readers would need to block | ||
859 | in an non-rt kernel, you need SRCU. If readers would block | 868 | in an non-rt kernel, you need SRCU. If readers would block |
860 | in a -rt kernel, but not in a non-rt kernel, SRCU is not | 869 | in a -rt kernel, but not in a non-rt kernel, SRCU is not |
861 | necessary. | 870 | necessary. |
862 | 871 | ||
863 | c. Do you need to treat NMI handlers, hardirq handlers, | 872 | d. Do you need to treat NMI handlers, hardirq handlers, |
864 | and code segments with preemption disabled (whether | 873 | and code segments with preemption disabled (whether |
865 | via preempt_disable(), local_irq_save(), local_bh_disable(), | 874 | via preempt_disable(), local_irq_save(), local_bh_disable(), |
866 | or some other mechanism) as if they were explicit RCU readers? | 875 | or some other mechanism) as if they were explicit RCU readers? |
867 | If so, you need RCU-sched. | 876 | If so, you need RCU-sched. |
868 | 877 | ||
869 | d. Do you need RCU grace periods to complete even in the face | 878 | e. Do you need RCU grace periods to complete even in the face |
870 | of softirq monopolization of one or more of the CPUs? For | 879 | of softirq monopolization of one or more of the CPUs? For |
871 | example, is your code subject to network-based denial-of-service | 880 | example, is your code subject to network-based denial-of-service |
872 | attacks? If so, you need RCU-bh. | 881 | attacks? If so, you need RCU-bh. |
873 | 882 | ||
874 | e. Is your workload too update-intensive for normal use of | 883 | f. Is your workload too update-intensive for normal use of |
875 | RCU, but inappropriate for other synchronization mechanisms? | 884 | RCU, but inappropriate for other synchronization mechanisms? |
876 | If so, consider SLAB_DESTROY_BY_RCU. But please be careful! | 885 | If so, consider SLAB_DESTROY_BY_RCU. But please be careful! |
877 | 886 | ||
878 | f. Otherwise, use RCU. | 887 | g. Otherwise, use RCU. |
879 | 888 | ||
880 | Of course, this all assumes that you have determined that RCU is in fact | 889 | Of course, this all assumes that you have determined that RCU is in fact |
881 | the right tool for your job. | 890 | the right tool for your job. |
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt index 771d48d3b335..208a2d465b92 100644 --- a/Documentation/arm/memory.txt +++ b/Documentation/arm/memory.txt | |||
@@ -51,15 +51,14 @@ ffc00000 ffefffff DMA memory mapping region. Memory returned | |||
51 | ff000000 ffbfffff Reserved for future expansion of DMA | 51 | ff000000 ffbfffff Reserved for future expansion of DMA |
52 | mapping region. | 52 | mapping region. |
53 | 53 | ||
54 | VMALLOC_END feffffff Free for platform use, recommended. | ||
55 | VMALLOC_END must be aligned to a 2MB | ||
56 | boundary. | ||
57 | |||
58 | VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space. | 54 | VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space. |
59 | Memory returned by vmalloc/ioremap will | 55 | Memory returned by vmalloc/ioremap will |
60 | be dynamically placed in this region. | 56 | be dynamically placed in this region. |
61 | VMALLOC_START may be based upon the value | 57 | Machine specific static mappings are also |
62 | of the high_memory variable. | 58 | located here through iotable_init(). |
59 | VMALLOC_START is based upon the value | ||
60 | of the high_memory variable, and VMALLOC_END | ||
61 | is equal to 0xff000000. | ||
63 | 62 | ||
64 | PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region. | 63 | PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region. |
65 | This maps the platforms RAM, and typically | 64 | This maps the platforms RAM, and typically |
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index 3bd585b44927..27f2b21a9d5c 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt | |||
@@ -84,6 +84,93 @@ compiler optimizes the section accessing atomic_t variables. | |||
84 | 84 | ||
85 | *** YOU HAVE BEEN WARNED! *** | 85 | *** YOU HAVE BEEN WARNED! *** |
86 | 86 | ||
87 | Properly aligned pointers, longs, ints, and chars (and unsigned | ||
88 | equivalents) may be atomically loaded from and stored to in the same | ||
89 | sense as described for atomic_read() and atomic_set(). The ACCESS_ONCE() | ||
90 | macro should be used to prevent the compiler from using optimizations | ||
91 | that might otherwise optimize accesses out of existence on the one hand, | ||
92 | or that might create unsolicited accesses on the other. | ||
93 | |||
94 | For example consider the following code: | ||
95 | |||
96 | while (a > 0) | ||
97 | do_something(); | ||
98 | |||
99 | If the compiler can prove that do_something() does not store to the | ||
100 | variable a, then the compiler is within its rights transforming this to | ||
101 | the following: | ||
102 | |||
103 | tmp = a; | ||
104 | if (a > 0) | ||
105 | for (;;) | ||
106 | do_something(); | ||
107 | |||
108 | If you don't want the compiler to do this (and you probably don't), then | ||
109 | you should use something like the following: | ||
110 | |||
111 | while (ACCESS_ONCE(a) < 0) | ||
112 | do_something(); | ||
113 | |||
114 | Alternatively, you could place a barrier() call in the loop. | ||
115 | |||
116 | For another example, consider the following code: | ||
117 | |||
118 | tmp_a = a; | ||
119 | do_something_with(tmp_a); | ||
120 | do_something_else_with(tmp_a); | ||
121 | |||
122 | If the compiler can prove that do_something_with() does not store to the | ||
123 | variable a, then the compiler is within its rights to manufacture an | ||
124 | additional load as follows: | ||
125 | |||
126 | tmp_a = a; | ||
127 | do_something_with(tmp_a); | ||
128 | tmp_a = a; | ||
129 | do_something_else_with(tmp_a); | ||
130 | |||
131 | This could fatally confuse your code if it expected the same value | ||
132 | to be passed to do_something_with() and do_something_else_with(). | ||
133 | |||
134 | The compiler would be likely to manufacture this additional load if | ||
135 | do_something_with() was an inline function that made very heavy use | ||
136 | of registers: reloading from variable a could save a flush to the | ||
137 | stack and later reload. To prevent the compiler from attacking your | ||
138 | code in this manner, write the following: | ||
139 | |||
140 | tmp_a = ACCESS_ONCE(a); | ||
141 | do_something_with(tmp_a); | ||
142 | do_something_else_with(tmp_a); | ||
143 | |||
144 | For a final example, consider the following code, assuming that the | ||
145 | variable a is set at boot time before the second CPU is brought online | ||
146 | and never changed later, so that memory barriers are not needed: | ||
147 | |||
148 | if (a) | ||
149 | b = 9; | ||
150 | else | ||
151 | b = 42; | ||
152 | |||
153 | The compiler is within its rights to manufacture an additional store | ||
154 | by transforming the above code into the following: | ||
155 | |||
156 | b = 42; | ||
157 | if (a) | ||
158 | b = 9; | ||
159 | |||
160 | This could come as a fatal surprise to other code running concurrently | ||
161 | that expected b to never have the value 42 if a was zero. To prevent | ||
162 | the compiler from doing this, write something like: | ||
163 | |||
164 | if (a) | ||
165 | ACCESS_ONCE(b) = 9; | ||
166 | else | ||
167 | ACCESS_ONCE(b) = 42; | ||
168 | |||
169 | Don't even -think- about doing this without proper use of memory barriers, | ||
170 | locks, or atomic operations if variable a can change at runtime! | ||
171 | |||
172 | *** WARNING: ACCESS_ONCE() DOES NOT IMPLY A BARRIER! *** | ||
173 | |||
87 | Now, we move onto the atomic operation interfaces typically implemented with | 174 | Now, we move onto the atomic operation interfaces typically implemented with |
88 | the help of assembly code. | 175 | the help of assembly code. |
89 | 176 | ||
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 9c452ef2328c..a7c96ae5557c 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -594,53 +594,44 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be | |||
594 | called multiple times against a cgroup. | 594 | called multiple times against a cgroup. |
595 | 595 | ||
596 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 596 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
597 | struct task_struct *task) | 597 | struct cgroup_taskset *tset) |
598 | (cgroup_mutex held by caller) | 598 | (cgroup_mutex held by caller) |
599 | 599 | ||
600 | Called prior to moving a task into a cgroup; if the subsystem | 600 | Called prior to moving one or more tasks into a cgroup; if the |
601 | returns an error, this will abort the attach operation. If a NULL | 601 | subsystem returns an error, this will abort the attach operation. |
602 | task is passed, then a successful result indicates that *any* | 602 | @tset contains the tasks to be attached and is guaranteed to have at |
603 | unspecified task can be moved into the cgroup. Note that this isn't | 603 | least one task in it. |
604 | called on a fork. If this method returns 0 (success) then this should | 604 | |
605 | remain valid while the caller holds cgroup_mutex and it is ensured that either | 605 | If there are multiple tasks in the taskset, then: |
606 | - it's guaranteed that all are from the same thread group | ||
607 | - @tset contains all tasks from the thread group whether or not | ||
608 | they're switching cgroups | ||
609 | - the first task is the leader | ||
610 | |||
611 | Each @tset entry also contains the task's old cgroup and tasks which | ||
612 | aren't switching cgroup can be skipped easily using the | ||
613 | cgroup_taskset_for_each() iterator. Note that this isn't called on a | ||
614 | fork. If this method returns 0 (success) then this should remain valid | ||
615 | while the caller holds cgroup_mutex and it is ensured that either | ||
606 | attach() or cancel_attach() will be called in future. | 616 | attach() or cancel_attach() will be called in future. |
607 | 617 | ||
608 | int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk); | ||
609 | (cgroup_mutex held by caller) | ||
610 | |||
611 | As can_attach, but for operations that must be run once per task to be | ||
612 | attached (possibly many when using cgroup_attach_proc). Called after | ||
613 | can_attach. | ||
614 | |||
615 | void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 618 | void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
616 | struct task_struct *task, bool threadgroup) | 619 | struct cgroup_taskset *tset) |
617 | (cgroup_mutex held by caller) | 620 | (cgroup_mutex held by caller) |
618 | 621 | ||
619 | Called when a task attach operation has failed after can_attach() has succeeded. | 622 | Called when a task attach operation has failed after can_attach() has succeeded. |
620 | A subsystem whose can_attach() has some side-effects should provide this | 623 | A subsystem whose can_attach() has some side-effects should provide this |
621 | function, so that the subsystem can implement a rollback. If not, not necessary. | 624 | function, so that the subsystem can implement a rollback. If not, not necessary. |
622 | This will be called only about subsystems whose can_attach() operation have | 625 | This will be called only about subsystems whose can_attach() operation have |
623 | succeeded. | 626 | succeeded. The parameters are identical to can_attach(). |
624 | |||
625 | void pre_attach(struct cgroup *cgrp); | ||
626 | (cgroup_mutex held by caller) | ||
627 | |||
628 | For any non-per-thread attachment work that needs to happen before | ||
629 | attach_task. Needed by cpuset. | ||
630 | 627 | ||
631 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 628 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
632 | struct cgroup *old_cgrp, struct task_struct *task) | 629 | struct cgroup_taskset *tset) |
633 | (cgroup_mutex held by caller) | 630 | (cgroup_mutex held by caller) |
634 | 631 | ||
635 | Called after the task has been attached to the cgroup, to allow any | 632 | Called after the task has been attached to the cgroup, to allow any |
636 | post-attachment activity that requires memory allocations or blocking. | 633 | post-attachment activity that requires memory allocations or blocking. |
637 | 634 | The parameters are identical to can_attach(). | |
638 | void attach_task(struct cgroup *cgrp, struct task_struct *tsk); | ||
639 | (cgroup_mutex held by caller) | ||
640 | |||
641 | As attach, but for operations that must be run once per task to be attached, | ||
642 | like can_attach_task. Called before attach. Currently does not support any | ||
643 | subsystem that might need the old_cgrp for every thread in the group. | ||
644 | 635 | ||
645 | void fork(struct cgroup_subsy *ss, struct task_struct *task) | 636 | void fork(struct cgroup_subsy *ss, struct task_struct *task) |
646 | 637 | ||
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index cc0ebc5241b3..4c95c0034a4b 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -44,8 +44,8 @@ Features: | |||
44 | - oom-killer disable knob and oom-notifier | 44 | - oom-killer disable knob and oom-notifier |
45 | - Root cgroup has no limit controls. | 45 | - Root cgroup has no limit controls. |
46 | 46 | ||
47 | Kernel memory and Hugepages are not under control yet. We just manage | 47 | Kernel memory support is work in progress, and the current version provides |
48 | pages on LRU. To add more controls, we have to take care of performance. | 48 | basically functionality. (See Section 2.7) |
49 | 49 | ||
50 | Brief summary of control files. | 50 | Brief summary of control files. |
51 | 51 | ||
@@ -61,7 +61,7 @@ Brief summary of control files. | |||
61 | memory.failcnt # show the number of memory usage hits limits | 61 | memory.failcnt # show the number of memory usage hits limits |
62 | memory.memsw.failcnt # show the number of memory+Swap hits limits | 62 | memory.memsw.failcnt # show the number of memory+Swap hits limits |
63 | memory.max_usage_in_bytes # show max memory usage recorded | 63 | memory.max_usage_in_bytes # show max memory usage recorded |
64 | memory.memsw.usage_in_bytes # show max memory+Swap usage recorded | 64 | memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded |
65 | memory.soft_limit_in_bytes # set/show soft limit of memory usage | 65 | memory.soft_limit_in_bytes # set/show soft limit of memory usage |
66 | memory.stat # show various statistics | 66 | memory.stat # show various statistics |
67 | memory.use_hierarchy # set/show hierarchical account enabled | 67 | memory.use_hierarchy # set/show hierarchical account enabled |
@@ -72,6 +72,9 @@ Brief summary of control files. | |||
72 | memory.oom_control # set/show oom controls. | 72 | memory.oom_control # set/show oom controls. |
73 | memory.numa_stat # show the number of memory usage per numa node | 73 | memory.numa_stat # show the number of memory usage per numa node |
74 | 74 | ||
75 | memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory | ||
76 | memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation | ||
77 | |||
75 | 1. History | 78 | 1. History |
76 | 79 | ||
77 | The memory controller has a long history. A request for comments for the memory | 80 | The memory controller has a long history. A request for comments for the memory |
@@ -255,6 +258,27 @@ When oom event notifier is registered, event will be delivered. | |||
255 | per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by | 258 | per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by |
256 | zone->lru_lock, it has no lock of its own. | 259 | zone->lru_lock, it has no lock of its own. |
257 | 260 | ||
261 | 2.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM) | ||
262 | |||
263 | With the Kernel memory extension, the Memory Controller is able to limit | ||
264 | the amount of kernel memory used by the system. Kernel memory is fundamentally | ||
265 | different than user memory, since it can't be swapped out, which makes it | ||
266 | possible to DoS the system by consuming too much of this precious resource. | ||
267 | |||
268 | Kernel memory limits are not imposed for the root cgroup. Usage for the root | ||
269 | cgroup may or may not be accounted. | ||
270 | |||
271 | Currently no soft limit is implemented for kernel memory. It is future work | ||
272 | to trigger slab reclaim when those limits are reached. | ||
273 | |||
274 | 2.7.1 Current Kernel Memory resources accounted | ||
275 | |||
276 | * sockets memory pressure: some sockets protocols have memory pressure | ||
277 | thresholds. The Memory Controller allows them to be controlled individually | ||
278 | per cgroup, instead of globally. | ||
279 | |||
280 | * tcp memory pressure: sockets memory pressure for the tcp protocol. | ||
281 | |||
258 | 3. User Interface | 282 | 3. User Interface |
259 | 283 | ||
260 | 0. Configuration | 284 | 0. Configuration |
@@ -386,8 +410,11 @@ memory.stat file includes following statistics | |||
386 | cache - # of bytes of page cache memory. | 410 | cache - # of bytes of page cache memory. |
387 | rss - # of bytes of anonymous and swap cache memory. | 411 | rss - # of bytes of anonymous and swap cache memory. |
388 | mapped_file - # of bytes of mapped file (includes tmpfs/shmem) | 412 | mapped_file - # of bytes of mapped file (includes tmpfs/shmem) |
389 | pgpgin - # of pages paged in (equivalent to # of charging events). | 413 | pgpgin - # of charging events to the memory cgroup. The charging |
390 | pgpgout - # of pages paged out (equivalent to # of uncharging events). | 414 | event happens each time a page is accounted as either mapped |
415 | anon page(RSS) or cache page(Page Cache) to the cgroup. | ||
416 | pgpgout - # of uncharging events to the memory cgroup. The uncharging | ||
417 | event happens each time a page is unaccounted from the cgroup. | ||
391 | swap - # of bytes of swap usage | 418 | swap - # of bytes of swap usage |
392 | inactive_anon - # of bytes of anonymous memory and swap cache memory on | 419 | inactive_anon - # of bytes of anonymous memory and swap cache memory on |
393 | LRU list. | 420 | LRU list. |
diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroups/net_prio.txt new file mode 100644 index 000000000000..01b322635591 --- /dev/null +++ b/Documentation/cgroups/net_prio.txt | |||
@@ -0,0 +1,53 @@ | |||
1 | Network priority cgroup | ||
2 | ------------------------- | ||
3 | |||
4 | The Network priority cgroup provides an interface to allow an administrator to | ||
5 | dynamically set the priority of network traffic generated by various | ||
6 | applications | ||
7 | |||
8 | Nominally, an application would set the priority of its traffic via the | ||
9 | SO_PRIORITY socket option. This however, is not always possible because: | ||
10 | |||
11 | 1) The application may not have been coded to set this value | ||
12 | 2) The priority of application traffic is often a site-specific administrative | ||
13 | decision rather than an application defined one. | ||
14 | |||
15 | This cgroup allows an administrator to assign a process to a group which defines | ||
16 | the priority of egress traffic on a given interface. Network priority groups can | ||
17 | be created by first mounting the cgroup filesystem. | ||
18 | |||
19 | # mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio | ||
20 | |||
21 | With the above step, the initial group acting as the parent accounting group | ||
22 | becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in | ||
23 | the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup. | ||
24 | |||
25 | Each net_prio cgroup contains two files that are subsystem specific | ||
26 | |||
27 | net_prio.prioidx | ||
28 | This file is read-only, and is simply informative. It contains a unique integer | ||
29 | value that the kernel uses as an internal representation of this cgroup. | ||
30 | |||
31 | net_prio.ifpriomap | ||
32 | This file contains a map of the priorities assigned to traffic originating from | ||
33 | processes in this group and egressing the system on various interfaces. It | ||
34 | contains a list of tuples in the form <ifname priority>. Contents of this file | ||
35 | can be modified by echoing a string into the file using the same tuple format. | ||
36 | for example: | ||
37 | |||
38 | echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap | ||
39 | |||
40 | This command would force any traffic originating from processes belonging to the | ||
41 | iscsi net_prio cgroup and egressing on interface eth0 to have the priority of | ||
42 | said traffic set to the value 5. The parent accounting group also has a | ||
43 | writeable 'net_prio.ifpriomap' file that can be used to set a system default | ||
44 | priority. | ||
45 | |||
46 | Priorities are set immediately prior to queueing a frame to the device | ||
47 | queueing discipline (qdisc) so priorities will be assigned prior to the hardware | ||
48 | queue selection being made. | ||
49 | |||
50 | One usage for the net_prio cgroup is with mqprio qdisc allowing application | ||
51 | traffic to be steered to hardware/driver based traffic classes. These mappings | ||
52 | can then be managed by administrators or other networking protocols such as | ||
53 | DCBX. | ||
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index d221781dabaa..c7a2eb8450c2 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt | |||
@@ -127,7 +127,7 @@ in the bash (as said, 1000 is default), do: | |||
127 | echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \ | 127 | echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \ |
128 | >ondemand/sampling_rate | 128 | >ondemand/sampling_rate |
129 | 129 | ||
130 | show_sampling_rate_min: | 130 | sampling_rate_min: |
131 | The sampling rate is limited by the HW transition latency: | 131 | The sampling rate is limited by the HW transition latency: |
132 | transition_latency * 100 | 132 | transition_latency * 100 |
133 | Or by kernel restrictions: | 133 | Or by kernel restrictions: |
@@ -140,8 +140,6 @@ HZ=100: min=200000us (200ms) | |||
140 | The highest value of kernel and HW latency restrictions is shown and | 140 | The highest value of kernel and HW latency restrictions is shown and |
141 | used as the minimum sampling rate. | 141 | used as the minimum sampling rate. |
142 | 142 | ||
143 | show_sampling_rate_max: THIS INTERFACE IS DEPRECATED, DON'T USE IT. | ||
144 | |||
145 | up_threshold: defines what the average CPU usage between the samplings | 143 | up_threshold: defines what the average CPU usage between the samplings |
146 | of 'sampling_rate' needs to be for the kernel to make a decision on | 144 | of 'sampling_rate' needs to be for the kernel to make a decision on |
147 | whether it should increase the frequency. For example when it is set | 145 | whether it should increase the frequency. For example when it is set |
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting index 903a2546f138..8a48c9b62864 100644 --- a/Documentation/development-process/5.Posting +++ b/Documentation/development-process/5.Posting | |||
@@ -271,10 +271,10 @@ copies should go to: | |||
271 | the linux-kernel list. | 271 | the linux-kernel list. |
272 | 272 | ||
273 | - If you are fixing a bug, think about whether the fix should go into the | 273 | - If you are fixing a bug, think about whether the fix should go into the |
274 | next stable update. If so, stable@kernel.org should get a copy of the | 274 | next stable update. If so, stable@vger.kernel.org should get a copy of |
275 | patch. Also add a "Cc: stable@kernel.org" to the tags within the patch | 275 | the patch. Also add a "Cc: stable@vger.kernel.org" to the tags within |
276 | itself; that will cause the stable team to get a notification when your | 276 | the patch itself; that will cause the stable team to get a notification |
277 | fix goes into the mainline. | 277 | when your fix goes into the mainline. |
278 | 278 | ||
279 | When selecting recipients for a patch, it is good to have an idea of who | 279 | When selecting recipients for a patch, it is good to have an idea of who |
280 | you think will eventually accept the patch and get it merged. While it | 280 | you think will eventually accept the patch and get it merged. While it |
diff --git a/Documentation/devices.txt b/Documentation/devices.txt index eccffe715229..cec8864ce4e8 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt | |||
@@ -379,7 +379,7 @@ Your cooperation is appreciated. | |||
379 | 162 = /dev/smbus System Management Bus | 379 | 162 = /dev/smbus System Management Bus |
380 | 163 = /dev/lik Logitech Internet Keyboard | 380 | 163 = /dev/lik Logitech Internet Keyboard |
381 | 164 = /dev/ipmo Intel Intelligent Platform Management | 381 | 164 = /dev/ipmo Intel Intelligent Platform Management |
382 | 165 = /dev/vmmon VMWare virtual machine monitor | 382 | 165 = /dev/vmmon VMware virtual machine monitor |
383 | 166 = /dev/i2o/ctl I2O configuration manager | 383 | 166 = /dev/i2o/ctl I2O configuration manager |
384 | 167 = /dev/specialix_sxctl Specialix serial control | 384 | 167 = /dev/specialix_sxctl Specialix serial control |
385 | 168 = /dev/tcldrv Technology Concepts serial control | 385 | 168 = /dev/tcldrv Technology Concepts serial control |
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt index c9848ad0e2e3..54bdddadf1cf 100644 --- a/Documentation/devicetree/bindings/arm/fsl.txt +++ b/Documentation/devicetree/bindings/arm/fsl.txt | |||
@@ -21,6 +21,10 @@ i.MX53 Smart Mobile Reference Design Board | |||
21 | Required root node properties: | 21 | Required root node properties: |
22 | - compatible = "fsl,imx53-smd", "fsl,imx53"; | 22 | - compatible = "fsl,imx53-smd", "fsl,imx53"; |
23 | 23 | ||
24 | i.MX6 Quad SABRE Automotive Board | 24 | i.MX6 Quad Armadillo2 Board |
25 | Required root node properties: | 25 | Required root node properties: |
26 | - compatible = "fsl,imx6q-sabreauto", "fsl,imx6q"; | 26 | - compatible = "fsl,imx6q-arm2", "fsl,imx6q"; |
27 | |||
28 | i.MX6 Quad SABRE Lite Board | ||
29 | Required root node properties: | ||
30 | - compatible = "fsl,imx6q-sabrelite", "fsl,imx6q"; | ||
diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt index 52916b4aa1fe..9b4b82a721b6 100644 --- a/Documentation/devicetree/bindings/arm/gic.txt +++ b/Documentation/devicetree/bindings/arm/gic.txt | |||
@@ -42,6 +42,10 @@ Optional | |||
42 | - interrupts : Interrupt source of the parent interrupt controller. Only | 42 | - interrupts : Interrupt source of the parent interrupt controller. Only |
43 | present on secondary GICs. | 43 | present on secondary GICs. |
44 | 44 | ||
45 | - cpu-offset : per-cpu offset within the distributor and cpu interface | ||
46 | regions, used when the GIC doesn't have banked registers. The offset is | ||
47 | cpu-offset * cpu-nr. | ||
48 | |||
45 | Example: | 49 | Example: |
46 | 50 | ||
47 | intc: interrupt-controller@fff11000 { | 51 | intc: interrupt-controller@fff11000 { |
diff --git a/Documentation/devicetree/bindings/arm/insignal-boards.txt b/Documentation/devicetree/bindings/arm/insignal-boards.txt new file mode 100644 index 000000000000..524c3dc5d808 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/insignal-boards.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | * Insignal's Exynos4210 based Origen evaluation board | ||
2 | |||
3 | Origen low-cost evaluation board is based on Samsung's Exynos4210 SoC. | ||
4 | |||
5 | Required root node properties: | ||
6 | - compatible = should be one or more of the following. | ||
7 | (a) "samsung,smdkv310" - for Samsung's SMDKV310 eval board. | ||
8 | (b) "samsung,exynos4210" - for boards based on Exynos4210 SoC. | ||
diff --git a/Documentation/devicetree/bindings/arm/samsung-boards.txt b/Documentation/devicetree/bindings/arm/samsung-boards.txt new file mode 100644 index 000000000000..0bf68be56fd1 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/samsung-boards.txt | |||
@@ -0,0 +1,8 @@ | |||
1 | * Samsung's Exynos4210 based SMDKV310 evaluation board | ||
2 | |||
3 | SMDKV310 evaluation board is based on Samsung's Exynos4210 SoC. | ||
4 | |||
5 | Required root node properties: | ||
6 | - compatible = should be one or more of the following. | ||
7 | (a) "samsung,smdkv310" - for Samsung's SMDKV310 eval board. | ||
8 | (b) "samsung,exynos4210" - for boards based on Exynos4210 SoC. | ||
diff --git a/Documentation/devicetree/bindings/arm/tegra.txt b/Documentation/devicetree/bindings/arm/tegra.txt new file mode 100644 index 000000000000..6e69d2e5e766 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | NVIDIA Tegra device tree bindings | ||
2 | ------------------------------------------- | ||
3 | |||
4 | Boards with the tegra20 SoC shall have the following properties: | ||
5 | |||
6 | Required root node property: | ||
7 | |||
8 | compatible = "nvidia,tegra20"; | ||
9 | |||
10 | Boards with the tegra30 SoC shall have the following properties: | ||
11 | |||
12 | Required root node property: | ||
13 | |||
14 | compatible = "nvidia,tegra30"; | ||
diff --git a/Documentation/devicetree/bindings/arm/vic.txt b/Documentation/devicetree/bindings/arm/vic.txt new file mode 100644 index 000000000000..266716b23437 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/vic.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | * ARM Vectored Interrupt Controller | ||
2 | |||
3 | One or more Vectored Interrupt Controllers (VIC's) can be connected in an ARM | ||
4 | system for interrupt routing. For multiple controllers they can either be | ||
5 | nested or have the outputs wire-OR'd together. | ||
6 | |||
7 | Required properties: | ||
8 | |||
9 | - compatible : should be one of | ||
10 | "arm,pl190-vic" | ||
11 | "arm,pl192-vic" | ||
12 | - interrupt-controller : Identifies the node as an interrupt controller | ||
13 | - #interrupt-cells : The number of cells to define the interrupts. Must be 1 as | ||
14 | the VIC has no configuration options for interrupt sources. The cell is a u32 | ||
15 | and defines the interrupt number. | ||
16 | - reg : The register bank for the VIC. | ||
17 | |||
18 | Optional properties: | ||
19 | |||
20 | - interrupts : Interrupt source for parent controllers if the VIC is nested. | ||
21 | |||
22 | Example: | ||
23 | |||
24 | vic0: interrupt-controller@60000 { | ||
25 | compatible = "arm,pl192-vic"; | ||
26 | interrupt-controller; | ||
27 | #interrupt-cells = <1>; | ||
28 | reg = <0x60000 0x1000>; | ||
29 | }; | ||
diff --git a/Documentation/devicetree/bindings/c6x/clocks.txt b/Documentation/devicetree/bindings/c6x/clocks.txt new file mode 100644 index 000000000000..a04f5fd30122 --- /dev/null +++ b/Documentation/devicetree/bindings/c6x/clocks.txt | |||
@@ -0,0 +1,40 @@ | |||
1 | C6X PLL Clock Controllers | ||
2 | ------------------------- | ||
3 | |||
4 | This is a first-cut support for the SoC clock controllers. This is still | ||
5 | under development and will probably change as the common device tree | ||
6 | clock support is added to the kernel. | ||
7 | |||
8 | Required properties: | ||
9 | |||
10 | - compatible: "ti,c64x+pll" | ||
11 | May also have SoC-specific value to support SoC-specific initialization | ||
12 | in the driver. One of: | ||
13 | "ti,c6455-pll" | ||
14 | "ti,c6457-pll" | ||
15 | "ti,c6472-pll" | ||
16 | "ti,c6474-pll" | ||
17 | |||
18 | - reg: base address and size of register area | ||
19 | - clock-frequency: input clock frequency in hz | ||
20 | |||
21 | |||
22 | Optional properties: | ||
23 | |||
24 | - ti,c64x+pll-bypass-delay: CPU cycles to delay when entering bypass mode | ||
25 | |||
26 | - ti,c64x+pll-reset-delay: CPU cycles to delay after PLL reset | ||
27 | |||
28 | - ti,c64x+pll-lock-delay: CPU cycles to delay after PLL frequency change | ||
29 | |||
30 | Example: | ||
31 | |||
32 | clock-controller@29a0000 { | ||
33 | compatible = "ti,c6472-pll", "ti,c64x+pll"; | ||
34 | reg = <0x029a0000 0x200>; | ||
35 | clock-frequency = <25000000>; | ||
36 | |||
37 | ti,c64x+pll-bypass-delay = <200>; | ||
38 | ti,c64x+pll-reset-delay = <12000>; | ||
39 | ti,c64x+pll-lock-delay = <80000>; | ||
40 | }; | ||
diff --git a/Documentation/devicetree/bindings/c6x/dscr.txt b/Documentation/devicetree/bindings/c6x/dscr.txt new file mode 100644 index 000000000000..d847758f2b20 --- /dev/null +++ b/Documentation/devicetree/bindings/c6x/dscr.txt | |||
@@ -0,0 +1,127 @@ | |||
1 | Device State Configuration Registers | ||
2 | ------------------------------------ | ||
3 | |||
4 | TI C6X SoCs contain a region of miscellaneous registers which provide various | ||
5 | function for SoC control or status. Details vary considerably among from SoC | ||
6 | to SoC with no two being alike. | ||
7 | |||
8 | In general, the Device State Configuraion Registers (DSCR) will provide one or | ||
9 | more configuration registers often protected by a lock register where one or | ||
10 | more key values must be written to a lock register in order to unlock the | ||
11 | configuration register for writes. These configuration register may be used to | ||
12 | enable (and disable in some cases) SoC pin drivers, select peripheral clock | ||
13 | sources (internal or pin), etc. In some cases, a configuration register is | ||
14 | write once or the individual bits are write once. In addition to device config, | ||
15 | the DSCR block may provide registers which which are used to reset peripherals, | ||
16 | provide device ID information, provide ethernet MAC addresses, as well as other | ||
17 | miscellaneous functions. | ||
18 | |||
19 | For device state control (enable/disable), each device control is assigned an | ||
20 | id which is used by individual device drivers to control the state as needed. | ||
21 | |||
22 | Required properties: | ||
23 | |||
24 | - compatible: must be "ti,c64x+dscr" | ||
25 | - reg: register area base and size | ||
26 | |||
27 | Optional properties: | ||
28 | |||
29 | NOTE: These are optional in that not all SoCs will have all properties. For | ||
30 | SoCs which do support a given property, leaving the property out of the | ||
31 | device tree will result in reduced functionality or possibly driver | ||
32 | failure. | ||
33 | |||
34 | - ti,dscr-devstat | ||
35 | offset of the devstat register | ||
36 | |||
37 | - ti,dscr-silicon-rev | ||
38 | offset, start bit, and bitsize of silicon revision field | ||
39 | |||
40 | - ti,dscr-rmii-resets | ||
41 | offset and bitmask of RMII reset field. May have multiple tuples if more | ||
42 | than one ethernet port is available. | ||
43 | |||
44 | - ti,dscr-locked-regs | ||
45 | possibly multiple tuples describing registers which are write protected by | ||
46 | a lock register. Each tuple consists of the register offset, lock register | ||
47 | offsset, and the key value used to unlock the register. | ||
48 | |||
49 | - ti,dscr-kick-regs | ||
50 | offset and key values of two "kick" registers used to write protect other | ||
51 | registers in DSCR. On SoCs using kick registers, the first key must be | ||
52 | written to the first kick register and the second key must be written to | ||
53 | the second register before other registers in the area are write-enabled. | ||
54 | |||
55 | - ti,dscr-mac-fuse-regs | ||
56 | MAC addresses are contained in two registers. Each element of a MAC address | ||
57 | is contained in a single byte. This property has two tuples. Each tuple has | ||
58 | a register offset and four cells representing bytes in the register from | ||
59 | most significant to least. The value of these four cells is the MAC byte | ||
60 | index (1-6) of the byte within the register. A value of 0 means the byte | ||
61 | is unused in the MAC address. | ||
62 | |||
63 | - ti,dscr-devstate-ctl-regs | ||
64 | This property describes the bitfields used to control the state of devices. | ||
65 | Each tuple describes a range of identical bitfields used to control one or | ||
66 | more devices (one bitfield per device). The layout of each tuple is: | ||
67 | |||
68 | start_id num_ids reg enable disable start_bit nbits | ||
69 | |||
70 | Where: | ||
71 | start_id is device id for the first device control in the range | ||
72 | num_ids is the number of device controls in the range | ||
73 | reg is the offset of the register holding the control bits | ||
74 | enable is the value to enable a device | ||
75 | disable is the value to disable a device (0xffffffff if cannot disable) | ||
76 | start_bit is the bit number of the first bit in the range | ||
77 | nbits is the number of bits per device control | ||
78 | |||
79 | - ti,dscr-devstate-stat-regs | ||
80 | This property describes the bitfields used to provide device state status | ||
81 | for device states controlled by the DSCR. Each tuple describes a range of | ||
82 | identical bitfields used to provide status for one or more devices (one | ||
83 | bitfield per device). The layout of each tuple is: | ||
84 | |||
85 | start_id num_ids reg enable disable start_bit nbits | ||
86 | |||
87 | Where: | ||
88 | start_id is device id for the first device status in the range | ||
89 | num_ids is the number of devices covered by the range | ||
90 | reg is the offset of the register holding the status bits | ||
91 | enable is the value indicating device is enabled | ||
92 | disable is the value indicating device is disabled | ||
93 | start_bit is the bit number of the first bit in the range | ||
94 | nbits is the number of bits per device status | ||
95 | |||
96 | - ti,dscr-privperm | ||
97 | Offset and default value for register used to set access privilege for | ||
98 | some SoC devices. | ||
99 | |||
100 | |||
101 | Example: | ||
102 | |||
103 | device-state-config-regs@2a80000 { | ||
104 | compatible = "ti,c64x+dscr"; | ||
105 | reg = <0x02a80000 0x41000>; | ||
106 | |||
107 | ti,dscr-devstat = <0>; | ||
108 | ti,dscr-silicon-rev = <8 28 0xf>; | ||
109 | ti,dscr-rmii-resets = <0x40020 0x00040000>; | ||
110 | |||
111 | ti,dscr-locked-regs = <0x40008 0x40004 0x0f0a0b00>; | ||
112 | ti,dscr-devstate-ctl-regs = | ||
113 | <0 12 0x40008 1 0 0 2 | ||
114 | 12 1 0x40008 3 0 30 2 | ||
115 | 13 2 0x4002c 1 0xffffffff 0 1>; | ||
116 | ti,dscr-devstate-stat-regs = | ||
117 | <0 10 0x40014 1 0 0 3 | ||
118 | 10 2 0x40018 1 0 0 3>; | ||
119 | |||
120 | ti,dscr-mac-fuse-regs = <0x700 1 2 3 4 | ||
121 | 0x704 5 6 0 0>; | ||
122 | |||
123 | ti,dscr-privperm = <0x41c 0xaaaaaaaa>; | ||
124 | |||
125 | ti,dscr-kick-regs = <0x38 0x83E70B13 | ||
126 | 0x3c 0x95A4F1E0>; | ||
127 | }; | ||
diff --git a/Documentation/devicetree/bindings/c6x/emifa.txt b/Documentation/devicetree/bindings/c6x/emifa.txt new file mode 100644 index 000000000000..0ff6e9b9a13f --- /dev/null +++ b/Documentation/devicetree/bindings/c6x/emifa.txt | |||
@@ -0,0 +1,62 @@ | |||
1 | External Memory Interface | ||
2 | ------------------------- | ||
3 | |||
4 | The emifa node describes a simple external bus controller found on some C6X | ||
5 | SoCs. This interface provides external busses with a number of chip selects. | ||
6 | |||
7 | Required properties: | ||
8 | |||
9 | - compatible: must be "ti,c64x+emifa", "simple-bus" | ||
10 | - reg: register area base and size | ||
11 | - #address-cells: must be 2 (chip-select + offset) | ||
12 | - #size-cells: must be 1 | ||
13 | - ranges: mapping from EMIFA space to parent space | ||
14 | |||
15 | |||
16 | Optional properties: | ||
17 | |||
18 | - ti,dscr-dev-enable: Device ID if EMIF is enabled/disabled from DSCR | ||
19 | |||
20 | - ti,emifa-burst-priority: | ||
21 | Number of memory transfers after which the EMIF will elevate the priority | ||
22 | of the oldest command in the command FIFO. Setting this field to 255 | ||
23 | disables this feature, thereby allowing old commands to stay in the FIFO | ||
24 | indefinitely. | ||
25 | |||
26 | - ti,emifa-ce-config: | ||
27 | Configuration values for each of the supported chip selects. | ||
28 | |||
29 | Example: | ||
30 | |||
31 | emifa@70000000 { | ||
32 | compatible = "ti,c64x+emifa", "simple-bus"; | ||
33 | #address-cells = <2>; | ||
34 | #size-cells = <1>; | ||
35 | reg = <0x70000000 0x100>; | ||
36 | ranges = <0x2 0x0 0xa0000000 0x00000008 | ||
37 | 0x3 0x0 0xb0000000 0x00400000 | ||
38 | 0x4 0x0 0xc0000000 0x10000000 | ||
39 | 0x5 0x0 0xD0000000 0x10000000>; | ||
40 | |||
41 | ti,dscr-dev-enable = <13>; | ||
42 | ti,emifa-burst-priority = <255>; | ||
43 | ti,emifa-ce-config = <0x00240120 | ||
44 | 0x00240120 | ||
45 | 0x00240122 | ||
46 | 0x00240122>; | ||
47 | |||
48 | flash@3,0 { | ||
49 | #address-cells = <1>; | ||
50 | #size-cells = <1>; | ||
51 | compatible = "cfi-flash"; | ||
52 | reg = <0x3 0x0 0x400000>; | ||
53 | bank-width = <1>; | ||
54 | device-width = <1>; | ||
55 | partition@0 { | ||
56 | reg = <0x0 0x400000>; | ||
57 | label = "NOR"; | ||
58 | }; | ||
59 | }; | ||
60 | }; | ||
61 | |||
62 | This shows a flash chip attached to chip select 3. | ||
diff --git a/Documentation/devicetree/bindings/c6x/interrupt.txt b/Documentation/devicetree/bindings/c6x/interrupt.txt new file mode 100644 index 000000000000..42bb796cc4ad --- /dev/null +++ b/Documentation/devicetree/bindings/c6x/interrupt.txt | |||
@@ -0,0 +1,104 @@ | |||
1 | C6X Interrupt Chips | ||
2 | ------------------- | ||
3 | |||
4 | * C64X+ Core Interrupt Controller | ||
5 | |||
6 | The core interrupt controller provides 16 prioritized interrupts to the | ||
7 | C64X+ core. Priority 0 and 1 are used for reset and NMI respectively. | ||
8 | Priority 2 and 3 are reserved. Priority 4-15 are used for interrupt | ||
9 | sources coming from outside the core. | ||
10 | |||
11 | Required properties: | ||
12 | -------------------- | ||
13 | - compatible: Should be "ti,c64x+core-pic"; | ||
14 | - #interrupt-cells: <1> | ||
15 | |||
16 | Interrupt Specifier Definition | ||
17 | ------------------------------ | ||
18 | Single cell specifying the core interrupt priority level (4-15) where | ||
19 | 4 is highest priority and 15 is lowest priority. | ||
20 | |||
21 | Example | ||
22 | ------- | ||
23 | core_pic: interrupt-controller@0 { | ||
24 | interrupt-controller; | ||
25 | #interrupt-cells = <1>; | ||
26 | compatible = "ti,c64x+core-pic"; | ||
27 | }; | ||
28 | |||
29 | |||
30 | |||
31 | * C64x+ Megamodule Interrupt Controller | ||
32 | |||
33 | The megamodule PIC consists of four interrupt mupliplexers each of which | ||
34 | combine up to 32 interrupt inputs into a single interrupt output which | ||
35 | may be cascaded into the core interrupt controller. The megamodule PIC | ||
36 | has a total of 12 outputs cascading into the core interrupt controller. | ||
37 | One for each core interrupt priority level. In addition to the combined | ||
38 | interrupt sources, individual megamodule interrupts may be cascaded to | ||
39 | the core interrupt controller. When an individual interrupt is cascaded, | ||
40 | it is no longer handled through a megamodule interrupt combiner and is | ||
41 | considered to have the core interrupt controller as the parent. | ||
42 | |||
43 | Required properties: | ||
44 | -------------------- | ||
45 | - compatible: "ti,c64x+megamod-pic" | ||
46 | - interrupt-controller | ||
47 | - #interrupt-cells: <1> | ||
48 | - reg: base address and size of register area | ||
49 | - interrupt-parent: must be core interrupt controller | ||
50 | - interrupts: This should have four cells; one for each interrupt combiner. | ||
51 | The cells contain the core priority interrupt to which the | ||
52 | corresponding combiner output is wired. | ||
53 | |||
54 | Optional properties: | ||
55 | -------------------- | ||
56 | - ti,c64x+megamod-pic-mux: Array of 12 cells correspnding to the 12 core | ||
57 | priority interrupts. The first cell corresponds to | ||
58 | core priority 4 and the last cell corresponds to | ||
59 | core priority 15. The value of each cell is the | ||
60 | megamodule interrupt source which is MUXed to | ||
61 | the core interrupt corresponding to the cell | ||
62 | position. Allowed values are 4 - 127. Mapping for | ||
63 | interrupts 0 - 3 (combined interrupt sources) are | ||
64 | ignored. | ||
65 | |||
66 | Interrupt Specifier Definition | ||
67 | ------------------------------ | ||
68 | Single cell specifying the megamodule interrupt source (4-127). Note that | ||
69 | interrupts mapped directly to the core with "ti,c64x+megamod-pic-mux" will | ||
70 | use the core interrupt controller as their parent and the specifier will | ||
71 | be the core priority level, not the megamodule interrupt number. | ||
72 | |||
73 | Examples | ||
74 | -------- | ||
75 | megamod_pic: interrupt-controller@1800000 { | ||
76 | compatible = "ti,c64x+megamod-pic"; | ||
77 | interrupt-controller; | ||
78 | #interrupt-cells = <1>; | ||
79 | reg = <0x1800000 0x1000>; | ||
80 | interrupt-parent = <&core_pic>; | ||
81 | interrupts = < 12 13 14 15 >; | ||
82 | }; | ||
83 | |||
84 | This is a minimal example where all individual interrupts go through a | ||
85 | combiner. Combiner-0 is mapped to core interrupt 12, combiner-1 is mapped | ||
86 | to interrupt 13, etc. | ||
87 | |||
88 | |||
89 | megamod_pic: interrupt-controller@1800000 { | ||
90 | compatible = "ti,c64x+megamod-pic"; | ||
91 | interrupt-controller; | ||
92 | #interrupt-cells = <1>; | ||
93 | reg = <0x1800000 0x1000>; | ||
94 | interrupt-parent = <&core_pic>; | ||
95 | interrupts = < 12 13 14 15 >; | ||
96 | ti,c64x+megamod-pic-mux = < 0 0 0 0 | ||
97 | 32 0 0 0 | ||
98 | 0 0 0 0 >; | ||
99 | }; | ||
100 | |||
101 | This the same as the first example except that megamodule interrupt 32 is | ||
102 | mapped directly to core priority interrupt 8. The node using this interrupt | ||
103 | must set the core controller as its interrupt parent and use 8 in the | ||
104 | interrupt specifier value. | ||
diff --git a/Documentation/devicetree/bindings/c6x/soc.txt b/Documentation/devicetree/bindings/c6x/soc.txt new file mode 100644 index 000000000000..b1e4973b5769 --- /dev/null +++ b/Documentation/devicetree/bindings/c6x/soc.txt | |||
@@ -0,0 +1,28 @@ | |||
1 | C6X System-on-Chip | ||
2 | ------------------ | ||
3 | |||
4 | Required properties: | ||
5 | |||
6 | - compatible: "simple-bus" | ||
7 | - #address-cells: must be 1 | ||
8 | - #size-cells: must be 1 | ||
9 | - ranges | ||
10 | |||
11 | Optional properties: | ||
12 | |||
13 | - model: specific SoC model | ||
14 | |||
15 | - nodes for IP blocks within SoC | ||
16 | |||
17 | |||
18 | Example: | ||
19 | |||
20 | soc { | ||
21 | compatible = "simple-bus"; | ||
22 | model = "tms320c6455"; | ||
23 | #address-cells = <1>; | ||
24 | #size-cells = <1>; | ||
25 | ranges; | ||
26 | |||
27 | ... | ||
28 | }; | ||
diff --git a/Documentation/devicetree/bindings/c6x/timer64.txt b/Documentation/devicetree/bindings/c6x/timer64.txt new file mode 100644 index 000000000000..95911fe70224 --- /dev/null +++ b/Documentation/devicetree/bindings/c6x/timer64.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | Timer64 | ||
2 | ------- | ||
3 | |||
4 | The timer64 node describes C6X event timers. | ||
5 | |||
6 | Required properties: | ||
7 | |||
8 | - compatible: must be "ti,c64x+timer64" | ||
9 | - reg: base address and size of register region | ||
10 | - interrupt-parent: interrupt controller | ||
11 | - interrupts: interrupt id | ||
12 | |||
13 | Optional properties: | ||
14 | |||
15 | - ti,dscr-dev-enable: Device ID used to enable timer IP through DSCR interface. | ||
16 | |||
17 | - ti,core-mask: on multi-core SoCs, bitmask of cores allowed to use this timer. | ||
18 | |||
19 | Example: | ||
20 | timer0: timer@25e0000 { | ||
21 | compatible = "ti,c64x+timer64"; | ||
22 | ti,core-mask = < 0x01 >; | ||
23 | reg = <0x25e0000 0x40>; | ||
24 | interrupt-parent = <&megamod_pic>; | ||
25 | interrupts = < 16 >; | ||
26 | }; | ||
diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt new file mode 100644 index 000000000000..a4cd273b2a67 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt | |||
@@ -0,0 +1,30 @@ | |||
1 | * ARM PrimeCell PL330 DMA Controller | ||
2 | |||
3 | The ARM PrimeCell PL330 DMA controller can move blocks of memory contents | ||
4 | between memory and peripherals or memory to memory. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible: should include both "arm,pl330" and "arm,primecell". | ||
8 | - reg: physical base address of the controller and length of memory mapped | ||
9 | region. | ||
10 | - interrupts: interrupt number to the cpu. | ||
11 | |||
12 | Example: | ||
13 | |||
14 | pdma0: pdma@12680000 { | ||
15 | compatible = "arm,pl330", "arm,primecell"; | ||
16 | reg = <0x12680000 0x1000>; | ||
17 | interrupts = <99>; | ||
18 | }; | ||
19 | |||
20 | Client drivers (device nodes requiring dma transfers from dev-to-mem or | ||
21 | mem-to-dev) should specify the DMA channel numbers using a two-value pair | ||
22 | as shown below. | ||
23 | |||
24 | [property name] = <[phandle of the dma controller] [dma request id]>; | ||
25 | |||
26 | where 'dma request id' is the dma request number which is connected | ||
27 | to the client controller. The 'property name' is recommended to be | ||
28 | of the form <name>-dma-channel. | ||
29 | |||
30 | Example: tx-dma-channel = <&pdma0 12>; | ||
diff --git a/Documentation/devicetree/bindings/gpio/gpio-samsung.txt b/Documentation/devicetree/bindings/gpio/gpio-samsung.txt new file mode 100644 index 000000000000..8f50fe5e6c42 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-samsung.txt | |||
@@ -0,0 +1,40 @@ | |||
1 | Samsung Exynos4 GPIO Controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: Compatible property value should be "samsung,exynos4-gpio>". | ||
5 | |||
6 | - reg: Physical base address of the controller and length of memory mapped | ||
7 | region. | ||
8 | |||
9 | - #gpio-cells: Should be 4. The syntax of the gpio specifier used by client nodes | ||
10 | should be the following with values derived from the SoC user manual. | ||
11 | <[phandle of the gpio controller node] | ||
12 | [pin number within the gpio controller] | ||
13 | [mux function] | ||
14 | [pull up/down] | ||
15 | [drive strength]> | ||
16 | |||
17 | Values for gpio specifier: | ||
18 | - Pin number: is a value between 0 to 7. | ||
19 | - Pull Up/Down: 0 - Pull Up/Down Disabled. | ||
20 | 1 - Pull Down Enabled. | ||
21 | 3 - Pull Up Enabled. | ||
22 | - Drive Strength: 0 - 1x, | ||
23 | 1 - 3x, | ||
24 | 2 - 2x, | ||
25 | 3 - 4x | ||
26 | |||
27 | - gpio-controller: Specifies that the node is a gpio controller. | ||
28 | - #address-cells: should be 1. | ||
29 | - #size-cells: should be 1. | ||
30 | |||
31 | Example: | ||
32 | |||
33 | gpa0: gpio-controller@11400000 { | ||
34 | #address-cells = <1>; | ||
35 | #size-cells = <1>; | ||
36 | compatible = "samsung,exynos4-gpio"; | ||
37 | reg = <0x11400000 0x20>; | ||
38 | #gpio-cells = <4>; | ||
39 | gpio-controller; | ||
40 | }; | ||
diff --git a/Documentation/devicetree/bindings/i2c/i2c-designware.txt b/Documentation/devicetree/bindings/i2c/i2c-designware.txt new file mode 100644 index 000000000000..e42a2ee233e6 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-designware.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | * Synopsys DesignWare I2C | ||
2 | |||
3 | Required properties : | ||
4 | |||
5 | - compatible : should be "snps,designware-i2c" | ||
6 | - reg : Offset and length of the register set for the device | ||
7 | - interrupts : <IRQ> where IRQ is the interrupt number. | ||
8 | |||
9 | Recommended properties : | ||
10 | |||
11 | - clock-frequency : desired I2C bus clock frequency in Hz. | ||
12 | |||
13 | Example : | ||
14 | |||
15 | i2c@f0000 { | ||
16 | #address-cells = <1>; | ||
17 | #size-cells = <0>; | ||
18 | compatible = "snps,designware-i2c"; | ||
19 | reg = <0xf0000 0x1000>; | ||
20 | interrupts = <11>; | ||
21 | clock-frequency = <400000>; | ||
22 | }; | ||
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt new file mode 100644 index 000000000000..1a85f986961b --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt | |||
@@ -0,0 +1,58 @@ | |||
1 | This is a list of trivial i2c devices that have simple device tree | ||
2 | bindings, consisting only of a compatible field, an address and | ||
3 | possibly an interrupt line. | ||
4 | |||
5 | If a device needs more specific bindings, such as properties to | ||
6 | describe some aspect of it, there needs to be a specific binding | ||
7 | document for it just like any other devices. | ||
8 | |||
9 | |||
10 | Compatible Vendor / Chip | ||
11 | ========== ============= | ||
12 | ad,ad7414 SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin | ||
13 | ad,adm9240 ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems | ||
14 | adi,adt7461 +/-1C TDM Extended Temp Range I.C | ||
15 | adt7461 +/-1C TDM Extended Temp Range I.C | ||
16 | at,24c08 i2c serial eeprom (24cxx) | ||
17 | atmel,24c02 i2c serial eeprom (24cxx) | ||
18 | catalyst,24c32 i2c serial eeprom | ||
19 | dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock | ||
20 | dallas,ds1338 I2C RTC with 56-Byte NV RAM | ||
21 | dallas,ds1339 I2C Serial Real-Time Clock | ||
22 | dallas,ds1340 I2C RTC with Trickle Charger | ||
23 | dallas,ds1374 I2C, 32-Bit Binary Counter Watchdog RTC with Trickle Charger and Reset Input/Output | ||
24 | dallas,ds1631 High-Precision Digital Thermometer | ||
25 | dallas,ds1682 Total-Elapsed-Time Recorder with Alarm | ||
26 | dallas,ds1775 Tiny Digital Thermometer and Thermostat | ||
27 | dallas,ds3232 Extremely Accurate I²C RTC with Integrated Crystal and SRAM | ||
28 | dallas,ds4510 CPU Supervisor with Nonvolatile Memory and Programmable I/O | ||
29 | dallas,ds75 Digital Thermometer and Thermostat | ||
30 | dialog,da9053 DA9053: flexible system level PMIC with multicore support | ||
31 | epson,rx8025 High-Stability. I2C-Bus INTERFACE REAL TIME CLOCK MODULE | ||
32 | epson,rx8581 I2C-BUS INTERFACE REAL TIME CLOCK MODULE | ||
33 | fsl,mag3110 MAG3110: Xtrinsic High Accuracy, 3D Magnetometer | ||
34 | fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51 | ||
35 | fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer | ||
36 | fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller | ||
37 | fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec | ||
38 | maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator | ||
39 | maxim,max1237 Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs | ||
40 | maxim,max6625 9-Bit/12-Bit Temperature Sensors with I²C-Compatible Serial Interface | ||
41 | mc,rv3029c2 Real Time Clock Module with I2C-Bus | ||
42 | national,lm75 I2C TEMP SENSOR | ||
43 | national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor | ||
44 | national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface | ||
45 | nxp,pca9556 Octal SMBus and I2C registered interface | ||
46 | nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset | ||
47 | nxp,pcf8563 Real-time clock/calendar | ||
48 | ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus | ||
49 | pericom,pt7c4338 Real-time Clock Module | ||
50 | plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch | ||
51 | ramtron,24c64 i2c serial eeprom (24cxx) | ||
52 | ricoh,rs5c372a I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC | ||
53 | samsung,24ad0xd1 S524AD0XF1 (128K/256K-bit Serial EEPROM for Low Power) | ||
54 | st-micro,24c256 i2c serial eeprom (24cxx) | ||
55 | stm,m41t00 Serial Access TIMEKEEPER | ||
56 | stm,m41t62 Serial real-time clock (RTC) with alarm | ||
57 | stm,m41t80 M41T80 - SERIAL ACCESS RTC WITH ALARMS | ||
58 | ti,tsc2003 I2C Touch-Screen Controller | ||
diff --git a/Documentation/devicetree/bindings/input/samsung-keypad.txt b/Documentation/devicetree/bindings/input/samsung-keypad.txt new file mode 100644 index 000000000000..ce3e394c0e64 --- /dev/null +++ b/Documentation/devicetree/bindings/input/samsung-keypad.txt | |||
@@ -0,0 +1,88 @@ | |||
1 | * Samsung's Keypad Controller device tree bindings | ||
2 | |||
3 | Samsung's Keypad controller is used to interface a SoC with a matrix-type | ||
4 | keypad device. The keypad controller supports multiple row and column lines. | ||
5 | A key can be placed at each intersection of a unique row and a unique column. | ||
6 | The keypad controller can sense a key-press and key-release and report the | ||
7 | event using a interrupt to the cpu. | ||
8 | |||
9 | Required SoC Specific Properties: | ||
10 | - compatible: should be one of the following | ||
11 | - "samsung,s3c6410-keypad": For controllers compatible with s3c6410 keypad | ||
12 | controller. | ||
13 | - "samsung,s5pv210-keypad": For controllers compatible with s5pv210 keypad | ||
14 | controller. | ||
15 | |||
16 | - reg: physical base address of the controller and length of memory mapped | ||
17 | region. | ||
18 | |||
19 | - interrupts: The interrupt number to the cpu. | ||
20 | |||
21 | Required Board Specific Properties: | ||
22 | - samsung,keypad-num-rows: Number of row lines connected to the keypad | ||
23 | controller. | ||
24 | |||
25 | - samsung,keypad-num-columns: Number of column lines connected to the | ||
26 | keypad controller. | ||
27 | |||
28 | - row-gpios: List of gpios used as row lines. The gpio specifier for | ||
29 | this property depends on the gpio controller to which these row lines | ||
30 | are connected. | ||
31 | |||
32 | - col-gpios: List of gpios used as column lines. The gpio specifier for | ||
33 | this property depends on the gpio controller to which these column | ||
34 | lines are connected. | ||
35 | |||
36 | - Keys represented as child nodes: Each key connected to the keypad | ||
37 | controller is represented as a child node to the keypad controller | ||
38 | device node and should include the following properties. | ||
39 | - keypad,row: the row number to which the key is connected. | ||
40 | - keypad,column: the column number to which the key is connected. | ||
41 | - linux,code: the key-code to be reported when the key is pressed | ||
42 | and released. | ||
43 | |||
44 | Optional Properties specific to linux: | ||
45 | - linux,keypad-no-autorepeat: do no enable autorepeat feature. | ||
46 | - linux,keypad-wakeup: use any event on keypad as wakeup event. | ||
47 | |||
48 | |||
49 | Example: | ||
50 | keypad@100A0000 { | ||
51 | compatible = "samsung,s5pv210-keypad"; | ||
52 | reg = <0x100A0000 0x100>; | ||
53 | interrupts = <173>; | ||
54 | samsung,keypad-num-rows = <2>; | ||
55 | samsung,keypad-num-columns = <8>; | ||
56 | linux,input-no-autorepeat; | ||
57 | linux,input-wakeup; | ||
58 | |||
59 | row-gpios = <&gpx2 0 3 3 0 | ||
60 | &gpx2 1 3 3 0>; | ||
61 | |||
62 | col-gpios = <&gpx1 0 3 0 0 | ||
63 | &gpx1 1 3 0 0 | ||
64 | &gpx1 2 3 0 0 | ||
65 | &gpx1 3 3 0 0 | ||
66 | &gpx1 4 3 0 0 | ||
67 | &gpx1 5 3 0 0 | ||
68 | &gpx1 6 3 0 0 | ||
69 | &gpx1 7 3 0 0>; | ||
70 | |||
71 | key_1 { | ||
72 | keypad,row = <0>; | ||
73 | keypad,column = <3>; | ||
74 | linux,code = <2>; | ||
75 | }; | ||
76 | |||
77 | key_2 { | ||
78 | keypad,row = <0>; | ||
79 | keypad,column = <4>; | ||
80 | linux,code = <3>; | ||
81 | }; | ||
82 | |||
83 | key_3 { | ||
84 | keypad,row = <0>; | ||
85 | keypad,column = <5>; | ||
86 | linux,code = <4>; | ||
87 | }; | ||
88 | }; | ||
diff --git a/Documentation/devicetree/bindings/input/tegra-kbc.txt b/Documentation/devicetree/bindings/input/tegra-kbc.txt new file mode 100644 index 000000000000..5ecfa99089b4 --- /dev/null +++ b/Documentation/devicetree/bindings/input/tegra-kbc.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | * Tegra keyboard controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: "nvidia,tegra20-kbc" | ||
5 | |||
6 | Optional properties: | ||
7 | - debounce-delay: delay in milliseconds per row scan for debouncing | ||
8 | - repeat-delay: delay in milliseconds before repeat starts | ||
9 | - ghost-filter: enable ghost filtering for this device | ||
10 | - wakeup-source: configure keyboard as a wakeup source for suspend/resume | ||
11 | |||
12 | Example: | ||
13 | |||
14 | keyboard: keyboard { | ||
15 | compatible = "nvidia,tegra20-kbc"; | ||
16 | reg = <0x7000e200 0x100>; | ||
17 | ghost-filter; | ||
18 | }; | ||
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt new file mode 100644 index 000000000000..719f4dc58df7 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt | |||
@@ -0,0 +1,44 @@ | |||
1 | GPIO assisted NAND flash | ||
2 | |||
3 | The GPIO assisted NAND flash uses a memory mapped interface to | ||
4 | read/write the NAND commands and data and GPIO pins for the control | ||
5 | signals. | ||
6 | |||
7 | Required properties: | ||
8 | - compatible : "gpio-control-nand" | ||
9 | - reg : should specify localbus chip select and size used for the chip. The | ||
10 | resource describes the data bus connected to the NAND flash and all accesses | ||
11 | are made in native endianness. | ||
12 | - #address-cells, #size-cells : Must be present if the device has sub-nodes | ||
13 | representing partitions. | ||
14 | - gpios : specifies the gpio pins to control the NAND device. nwp is an | ||
15 | optional gpio and may be set to 0 if not present. | ||
16 | |||
17 | Optional properties: | ||
18 | - bank-width : Width (in bytes) of the device. If not present, the width | ||
19 | defaults to 1 byte. | ||
20 | - chip-delay : chip dependent delay for transferring data from array to | ||
21 | read registers (tR). If not present then a default of 20us is used. | ||
22 | - gpio-control-nand,io-sync-reg : A 64-bit physical address for a read | ||
23 | location used to guard against bus reordering with regards to accesses to | ||
24 | the GPIO's and the NAND flash data bus. If present, then after changing | ||
25 | GPIO state and before and after command byte writes, this register will be | ||
26 | read to ensure that the GPIO accesses have completed. | ||
27 | |||
28 | Examples: | ||
29 | |||
30 | gpio-nand@1,0 { | ||
31 | compatible = "gpio-control-nand"; | ||
32 | reg = <1 0x0000 0x2>; | ||
33 | #address-cells = <1>; | ||
34 | #size-cells = <1>; | ||
35 | gpios = <&banka 1 0 /* rdy */ | ||
36 | &banka 2 0 /* nce */ | ||
37 | &banka 3 0 /* ale */ | ||
38 | &banka 4 0 /* cle */ | ||
39 | 0 /* nwp */>; | ||
40 | |||
41 | partition@0 { | ||
42 | ... | ||
43 | }; | ||
44 | }; | ||
diff --git a/Documentation/devicetree/bindings/net/calxeda-xgmac.txt b/Documentation/devicetree/bindings/net/calxeda-xgmac.txt new file mode 100644 index 000000000000..411727a3f82d --- /dev/null +++ b/Documentation/devicetree/bindings/net/calxeda-xgmac.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | * Calxeda Highbank 10Gb XGMAC Ethernet | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : Should be "calxeda,hb-xgmac" | ||
5 | - reg : Address and length of the register set for the device | ||
6 | - interrupts : Should contain 3 xgmac interrupts. The 1st is main interrupt. | ||
7 | The 2nd is pwr mgt interrupt. The 3rd is low power state interrupt. | ||
8 | |||
9 | Example: | ||
10 | |||
11 | ethernet@fff50000 { | ||
12 | compatible = "calxeda,hb-xgmac"; | ||
13 | reg = <0xfff50000 0x1000>; | ||
14 | interrupts = <0 77 4 0 78 4 0 79 4>; | ||
15 | }; | ||
diff --git a/Documentation/devicetree/bindings/net/can/cc770.txt b/Documentation/devicetree/bindings/net/can/cc770.txt new file mode 100644 index 000000000000..77027bf6460a --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/cc770.txt | |||
@@ -0,0 +1,53 @@ | |||
1 | Memory mapped Bosch CC770 and Intel AN82527 CAN controller | ||
2 | |||
3 | Note: The CC770 is a CAN controller from Bosch, which is 100% | ||
4 | compatible with the old AN82527 from Intel, but with "bugs" being fixed. | ||
5 | |||
6 | Required properties: | ||
7 | |||
8 | - compatible : should be "bosch,cc770" for the CC770 and "intc,82527" | ||
9 | for the AN82527. | ||
10 | |||
11 | - reg : should specify the chip select, address offset and size required | ||
12 | to map the registers of the controller. The size is usually 0x80. | ||
13 | |||
14 | - interrupts : property with a value describing the interrupt source | ||
15 | (number and sensitivity) required for the controller. | ||
16 | |||
17 | Optional properties: | ||
18 | |||
19 | - bosch,external-clock-frequency : frequency of the external oscillator | ||
20 | clock in Hz. Note that the internal clock frequency used by the | ||
21 | controller is half of that value. If not specified, a default | ||
22 | value of 16000000 (16 MHz) is used. | ||
23 | |||
24 | - bosch,clock-out-frequency : slock frequency in Hz on the CLKOUT pin. | ||
25 | If not specified or if the specified value is 0, the CLKOUT pin | ||
26 | will be disabled. | ||
27 | |||
28 | - bosch,slew-rate : slew rate of the CLKOUT signal. If not specified, | ||
29 | a resonable value will be calculated. | ||
30 | |||
31 | - bosch,disconnect-rx0-input : see data sheet. | ||
32 | |||
33 | - bosch,disconnect-rx1-input : see data sheet. | ||
34 | |||
35 | - bosch,disconnect-tx1-output : see data sheet. | ||
36 | |||
37 | - bosch,polarity-dominant : see data sheet. | ||
38 | |||
39 | - bosch,divide-memory-clock : see data sheet. | ||
40 | |||
41 | - bosch,iso-low-speed-mux : see data sheet. | ||
42 | |||
43 | For further information, please have a look to the CC770 or AN82527. | ||
44 | |||
45 | Examples: | ||
46 | |||
47 | can@3,100 { | ||
48 | compatible = "bosch,cc770"; | ||
49 | reg = <3 0x100 0x80>; | ||
50 | interrupts = <2 0>; | ||
51 | interrupt-parent = <&mpic>; | ||
52 | bosch,external-clock-frequency = <16000000>; | ||
53 | }; | ||
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt new file mode 100644 index 000000000000..44afa0e5057d --- /dev/null +++ b/Documentation/devicetree/bindings/net/macb.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | * Cadence MACB/GEM Ethernet controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: Should be "cdns,[<chip>-]{macb|gem}" | ||
5 | Use "cdns,at91sam9260-macb" Atmel at91sam9260 and at91sam9263 SoCs. | ||
6 | Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb". | ||
7 | Use "cnds,pc302-gem" for Picochip picoXcell pc302 and later devices based on | ||
8 | the Cadence GEM, or the generic form: "cdns,gem". | ||
9 | - reg: Address and length of the register set for the device | ||
10 | - interrupts: Should contain macb interrupt | ||
11 | - phy-mode: String, operation mode of the PHY interface. | ||
12 | Supported values are: "mii", "rmii", "gmii", "rgmii". | ||
13 | |||
14 | Optional properties: | ||
15 | - local-mac-address: 6 bytes, mac address | ||
16 | |||
17 | Examples: | ||
18 | |||
19 | macb0: ethernet@fffc4000 { | ||
20 | compatible = "cdns,at32ap7000-macb"; | ||
21 | reg = <0xfffc4000 0x4000>; | ||
22 | interrupts = <21>; | ||
23 | phy-mode = "rmii"; | ||
24 | local-mac-address = [3a 0e 03 04 05 06]; | ||
25 | }; | ||
diff --git a/Documentation/devicetree/bindings/nvec/nvec_nvidia.txt b/Documentation/devicetree/bindings/nvec/nvec_nvidia.txt new file mode 100644 index 000000000000..5aeee53ff9f4 --- /dev/null +++ b/Documentation/devicetree/bindings/nvec/nvec_nvidia.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | NVIDIA compliant embedded controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should be "nvidia,nvec". | ||
5 | - reg : the iomem of the i2c slave controller | ||
6 | - interrupts : the interrupt line of the i2c slave controller | ||
7 | - clock-frequency : the frequency of the i2c bus | ||
8 | - gpios : the gpio used for ec request | ||
9 | - slave-addr: the i2c address of the slave controller | ||
diff --git a/Documentation/devicetree/bindings/power_supply/olpc_battery.txt b/Documentation/devicetree/bindings/power_supply/olpc_battery.txt new file mode 100644 index 000000000000..c8901b3992d9 --- /dev/null +++ b/Documentation/devicetree/bindings/power_supply/olpc_battery.txt | |||
@@ -0,0 +1,5 @@ | |||
1 | OLPC battery | ||
2 | ~~~~~~~~~~~~ | ||
3 | |||
4 | Required properties: | ||
5 | - compatible : "olpc,xo1-battery" | ||
diff --git a/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt b/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt new file mode 100644 index 000000000000..c40e8926facf --- /dev/null +++ b/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt | |||
@@ -0,0 +1,23 @@ | |||
1 | SBS sbs-battery | ||
2 | ~~~~~~~~~~ | ||
3 | |||
4 | Required properties : | ||
5 | - compatible : "sbs,sbs-battery" | ||
6 | |||
7 | Optional properties : | ||
8 | - sbs,i2c-retry-count : The number of times to retry i2c transactions on i2c | ||
9 | IO failure. | ||
10 | - sbs,poll-retry-count : The number of times to try looking for new status | ||
11 | after an external change notification. | ||
12 | - sbs,battery-detect-gpios : The gpio which signals battery detection and | ||
13 | a flag specifying its polarity. | ||
14 | |||
15 | Example: | ||
16 | |||
17 | bq20z75@b { | ||
18 | compatible = "sbs,sbs-battery"; | ||
19 | reg = < 0xb >; | ||
20 | sbs,i2c-retry-count = <2>; | ||
21 | sbs,poll-retry-count = <10>; | ||
22 | sbs,battery-detect-gpios = <&gpio-controller 122 1>; | ||
23 | } | ||
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt b/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt new file mode 100644 index 000000000000..b9a8a2bcfae7 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt | |||
@@ -0,0 +1,163 @@ | |||
1 | Message unit node: | ||
2 | |||
3 | For SRIO controllers that implement the message unit as part of the controller | ||
4 | this node is required. For devices with RMAN this node should NOT exist. The | ||
5 | node is composed of three types of sub-nodes ("fsl-srio-msg-unit", | ||
6 | "fsl-srio-dbell-unit" and "fsl-srio-port-write-unit"). | ||
7 | |||
8 | See srio.txt for more details about generic SRIO controller details. | ||
9 | |||
10 | - compatible | ||
11 | Usage: required | ||
12 | Value type: <string> | ||
13 | Definition: Must include "fsl,srio-rmu-vX.Y", "fsl,srio-rmu". | ||
14 | |||
15 | The version X.Y should match the general SRIO controller's IP Block | ||
16 | revision register's Major(X) and Minor (Y) value. | ||
17 | |||
18 | - reg | ||
19 | Usage: required | ||
20 | Value type: <prop-encoded-array> | ||
21 | Definition: A standard property. Specifies the physical address and | ||
22 | length of the SRIO configuration registers for message units | ||
23 | and doorbell units. | ||
24 | |||
25 | - fsl,liodn | ||
26 | Usage: optional-but-recommended (for devices with PAMU) | ||
27 | Value type: <prop-encoded-array> | ||
28 | Definition: The logical I/O device number for the PAMU (IOMMU) to be | ||
29 | correctly configured for SRIO accesses. The property should | ||
30 | not exist on devices that do not support PAMU. | ||
31 | |||
32 | The LIODN value is associated with all RMU transactions | ||
33 | (msg-unit, doorbell, port-write). | ||
34 | |||
35 | Sub-Nodes for RMU: The RMU node is composed of multiple sub-nodes that | ||
36 | correspond to the actual sub-controllers in the RMU. The manual for a given | ||
37 | SoC will detail which and how many of these sub-controllers are implemented. | ||
38 | |||
39 | Message Unit: | ||
40 | |||
41 | - compatible | ||
42 | Usage: required | ||
43 | Value type: <string> | ||
44 | Definition: Must include "fsl,srio-msg-unit-vX.Y", "fsl,srio-msg-unit". | ||
45 | |||
46 | The version X.Y should match the general SRIO controller's IP Block | ||
47 | revision register's Major(X) and Minor (Y) value. | ||
48 | |||
49 | - reg | ||
50 | Usage: required | ||
51 | Value type: <prop-encoded-array> | ||
52 | Definition: A standard property. Specifies the physical address and | ||
53 | length of the SRIO configuration registers for message units | ||
54 | and doorbell units. | ||
55 | |||
56 | - interrupts | ||
57 | Usage: required | ||
58 | Value type: <prop_encoded-array> | ||
59 | Definition: Specifies the interrupts generated by this device. The | ||
60 | value of the interrupts property consists of one interrupt | ||
61 | specifier. The format of the specifier is defined by the | ||
62 | binding document describing the node's interrupt parent. | ||
63 | |||
64 | A pair of IRQs are specified in this property. The first | ||
65 | element is associated with the transmit (TX) interrupt and the | ||
66 | second element is associated with the receive (RX) interrupt. | ||
67 | |||
68 | Doorbell Unit: | ||
69 | |||
70 | - compatible | ||
71 | Usage: required | ||
72 | Value type: <string> | ||
73 | Definition: Must include: | ||
74 | "fsl,srio-dbell-unit-vX.Y", "fsl,srio-dbell-unit" | ||
75 | |||
76 | The version X.Y should match the general SRIO controller's IP Block | ||
77 | revision register's Major(X) and Minor (Y) value. | ||
78 | |||
79 | - reg | ||
80 | Usage: required | ||
81 | Value type: <prop-encoded-array> | ||
82 | Definition: A standard property. Specifies the physical address and | ||
83 | length of the SRIO configuration registers for message units | ||
84 | and doorbell units. | ||
85 | |||
86 | - interrupts | ||
87 | Usage: required | ||
88 | Value type: <prop_encoded-array> | ||
89 | Definition: Specifies the interrupts generated by this device. The | ||
90 | value of the interrupts property consists of one interrupt | ||
91 | specifier. The format of the specifier is defined by the | ||
92 | binding document describing the node's interrupt parent. | ||
93 | |||
94 | A pair of IRQs are specified in this property. The first | ||
95 | element is associated with the transmit (TX) interrupt and the | ||
96 | second element is associated with the receive (RX) interrupt. | ||
97 | |||
98 | Port-Write Unit: | ||
99 | |||
100 | - compatible | ||
101 | Usage: required | ||
102 | Value type: <string> | ||
103 | Definition: Must include: | ||
104 | "fsl,srio-port-write-unit-vX.Y", "fsl,srio-port-write-unit" | ||
105 | |||
106 | The version X.Y should match the general SRIO controller's IP Block | ||
107 | revision register's Major(X) and Minor (Y) value. | ||
108 | |||
109 | - reg | ||
110 | Usage: required | ||
111 | Value type: <prop-encoded-array> | ||
112 | Definition: A standard property. Specifies the physical address and | ||
113 | length of the SRIO configuration registers for message units | ||
114 | and doorbell units. | ||
115 | |||
116 | - interrupts | ||
117 | Usage: required | ||
118 | Value type: <prop_encoded-array> | ||
119 | Definition: Specifies the interrupts generated by this device. The | ||
120 | value of the interrupts property consists of one interrupt | ||
121 | specifier. The format of the specifier is defined by the | ||
122 | binding document describing the node's interrupt parent. | ||
123 | |||
124 | A single IRQ that handles port-write conditions is | ||
125 | specified by this property. (Typically shared with error). | ||
126 | |||
127 | Note: All other standard properties (see the ePAPR) are allowed | ||
128 | but are optional. | ||
129 | |||
130 | Example: | ||
131 | rmu: rmu@d3000 { | ||
132 | compatible = "fsl,srio-rmu"; | ||
133 | reg = <0xd3000 0x400>; | ||
134 | ranges = <0x0 0xd3000 0x400>; | ||
135 | fsl,liodn = <0xc8>; | ||
136 | |||
137 | message-unit@0 { | ||
138 | compatible = "fsl,srio-msg-unit"; | ||
139 | reg = <0x0 0x100>; | ||
140 | interrupts = < | ||
141 | 60 2 0 0 /* msg1_tx_irq */ | ||
142 | 61 2 0 0>;/* msg1_rx_irq */ | ||
143 | }; | ||
144 | message-unit@100 { | ||
145 | compatible = "fsl,srio-msg-unit"; | ||
146 | reg = <0x100 0x100>; | ||
147 | interrupts = < | ||
148 | 62 2 0 0 /* msg2_tx_irq */ | ||
149 | 63 2 0 0>;/* msg2_rx_irq */ | ||
150 | }; | ||
151 | doorbell-unit@400 { | ||
152 | compatible = "fsl,srio-dbell-unit"; | ||
153 | reg = <0x400 0x80>; | ||
154 | interrupts = < | ||
155 | 56 2 0 0 /* bell_outb_irq */ | ||
156 | 57 2 0 0>;/* bell_inb_irq */ | ||
157 | }; | ||
158 | port-write-unit@4e0 { | ||
159 | compatible = "fsl,srio-port-write-unit"; | ||
160 | reg = <0x4e0 0x20>; | ||
161 | interrupts = <16 2 1 11>; | ||
162 | }; | ||
163 | }; | ||
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/srio.txt b/Documentation/devicetree/bindings/powerpc/fsl/srio.txt new file mode 100644 index 000000000000..b039bcbee134 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/srio.txt | |||
@@ -0,0 +1,103 @@ | |||
1 | * Freescale Serial RapidIO (SRIO) Controller | ||
2 | |||
3 | RapidIO port node: | ||
4 | Properties: | ||
5 | - compatible | ||
6 | Usage: required | ||
7 | Value type: <string> | ||
8 | Definition: Must include "fsl,srio" for IP blocks with IP Block | ||
9 | Revision Register (SRIO IPBRR1) Major ID equal to 0x01c0. | ||
10 | |||
11 | Optionally, a compatiable string of "fsl,srio-vX.Y" where X is Major | ||
12 | version in IP Block Revision Register and Y is Minor version. If this | ||
13 | compatiable is provided it should be ordered before "fsl,srio". | ||
14 | |||
15 | - reg | ||
16 | Usage: required | ||
17 | Value type: <prop-encoded-array> | ||
18 | Definition: A standard property. Specifies the physical address and | ||
19 | length of the SRIO configuration registers. The size should | ||
20 | be set to 0x11000. | ||
21 | |||
22 | - interrupts | ||
23 | Usage: required | ||
24 | Value type: <prop_encoded-array> | ||
25 | Definition: Specifies the interrupts generated by this device. The | ||
26 | value of the interrupts property consists of one interrupt | ||
27 | specifier. The format of the specifier is defined by the | ||
28 | binding document describing the node's interrupt parent. | ||
29 | |||
30 | A single IRQ that handles error conditions is specified by this | ||
31 | property. (Typically shared with port-write). | ||
32 | |||
33 | - fsl,srio-rmu-handle: | ||
34 | Usage: required if rmu node is defined | ||
35 | Value type: <phandle> | ||
36 | Definition: A single <phandle> value that points to the RMU. | ||
37 | (See srio-rmu.txt for more details on RMU node binding) | ||
38 | |||
39 | Port Child Nodes: There should a port child node for each port that exists in | ||
40 | the controller. The ports are numbered starting at one (1) and should have | ||
41 | the following properties: | ||
42 | |||
43 | - cell-index | ||
44 | Usage: required | ||
45 | Value type: <u32> | ||
46 | Definition: A standard property. Matches the port id. | ||
47 | |||
48 | - ranges | ||
49 | Usage: required if local access windows preset | ||
50 | Value type: <prop-encoded-array> | ||
51 | Definition: A standard property. Utilized to describe the memory mapped | ||
52 | IO space utilized by the controller. This corresponds to the | ||
53 | setting of the local access windows that are targeted to this | ||
54 | SRIO port. | ||
55 | |||
56 | - fsl,liodn | ||
57 | Usage: optional-but-recommended (for devices with PAMU) | ||
58 | Value type: <prop-encoded-array> | ||
59 | Definition: The logical I/O device number for the PAMU (IOMMU) to be | ||
60 | correctly configured for SRIO accesses. The property should | ||
61 | not exist on devices that do not support PAMU. | ||
62 | |||
63 | For HW (ie, the P4080) that only supports a LIODN for both | ||
64 | memory and maintenance transactions then a single LIODN is | ||
65 | represented in the property for both transactions. | ||
66 | |||
67 | For HW (ie, the P304x/P5020, etc) that supports an LIODN for | ||
68 | memory transactions and a unique LIODN for maintenance | ||
69 | transactions then a pair of LIODNs are represented in the | ||
70 | property. Within the pair, the first element represents the | ||
71 | LIODN associated with memory transactions and the second element | ||
72 | represents the LIODN associated with maintenance transactions | ||
73 | for the port. | ||
74 | |||
75 | Note: All other standard properties (see ePAPR) are allowed but are optional. | ||
76 | |||
77 | Example: | ||
78 | |||
79 | rapidio: rapidio@ffe0c0000 { | ||
80 | #address-cells = <2>; | ||
81 | #size-cells = <2>; | ||
82 | reg = <0xf 0xfe0c0000 0 0x11000>; | ||
83 | compatible = "fsl,srio"; | ||
84 | interrupts = <16 2 1 11>; /* err_irq */ | ||
85 | fsl,srio-rmu-handle = <&rmu>; | ||
86 | ranges; | ||
87 | |||
88 | port1 { | ||
89 | cell-index = <1>; | ||
90 | #address-cells = <2>; | ||
91 | #size-cells = <2>; | ||
92 | fsl,liodn = <34>; | ||
93 | ranges = <0 0 0xc 0x20000000 0 0x10000000>; | ||
94 | }; | ||
95 | |||
96 | port2 { | ||
97 | cell-index = <2>; | ||
98 | #address-cells = <2>; | ||
99 | #size-cells = <2>; | ||
100 | fsl,liodn = <48>; | ||
101 | ranges = <0 0 0xc 0x30000000 0 0x10000000>; | ||
102 | }; | ||
103 | }; | ||
diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.txt b/Documentation/devicetree/bindings/regulator/fixed-regulator.txt new file mode 100644 index 000000000000..9cf57fd042d2 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | Fixed Voltage regulators | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: Must be "regulator-fixed"; | ||
5 | |||
6 | Optional properties: | ||
7 | - gpio: gpio to use for enable control | ||
8 | - startup-delay-us: startup time in microseconds | ||
9 | - enable-active-high: Polarity of GPIO is Active high | ||
10 | If this property is missing, the default assumed is Active low. | ||
11 | |||
12 | Any property defined as part of the core regulator | ||
13 | binding, defined in regulator.txt, can also be used. | ||
14 | However a fixed voltage regulator is expected to have the | ||
15 | regulator-min-microvolt and regulator-max-microvolt | ||
16 | to be the same. | ||
17 | |||
18 | Example: | ||
19 | |||
20 | abc: fixedregulator@0 { | ||
21 | compatible = "regulator-fixed"; | ||
22 | regulator-name = "fixed-supply"; | ||
23 | regulator-min-microvolt = <1800000>; | ||
24 | regulator-max-microvolt = <1800000>; | ||
25 | gpio = <&gpio1 16 0>; | ||
26 | startup-delay-us = <70000>; | ||
27 | enable-active-high; | ||
28 | regulator-boot-on | ||
29 | }; | ||
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt new file mode 100644 index 000000000000..5b7a408acdaa --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/regulator.txt | |||
@@ -0,0 +1,54 @@ | |||
1 | Voltage/Current Regulators | ||
2 | |||
3 | Optional properties: | ||
4 | - regulator-name: A string used as a descriptive name for regulator outputs | ||
5 | - regulator-min-microvolt: smallest voltage consumers may set | ||
6 | - regulator-max-microvolt: largest voltage consumers may set | ||
7 | - regulator-microvolt-offset: Offset applied to voltages to compensate for voltage drops | ||
8 | - regulator-min-microamp: smallest current consumers may set | ||
9 | - regulator-max-microamp: largest current consumers may set | ||
10 | - regulator-always-on: boolean, regulator should never be disabled | ||
11 | - regulator-boot-on: bootloader/firmware enabled regulator | ||
12 | - <name>-supply: phandle to the parent supply/regulator node | ||
13 | |||
14 | Example: | ||
15 | |||
16 | xyzreg: regulator@0 { | ||
17 | regulator-min-microvolt = <1000000>; | ||
18 | regulator-max-microvolt = <2500000>; | ||
19 | regulator-always-on; | ||
20 | vin-supply = <&vin>; | ||
21 | }; | ||
22 | |||
23 | Regulator Consumers: | ||
24 | Consumer nodes can reference one or more of its supplies/ | ||
25 | regulators using the below bindings. | ||
26 | |||
27 | - <name>-supply: phandle to the regulator node | ||
28 | |||
29 | These are the same bindings that a regulator in the above | ||
30 | example used to reference its own supply, in which case | ||
31 | its just seen as a special case of a regulator being a | ||
32 | consumer itself. | ||
33 | |||
34 | Example of a consumer device node (mmc) referencing two | ||
35 | regulators (twl_reg1 and twl_reg2), | ||
36 | |||
37 | twl_reg1: regulator@0 { | ||
38 | ... | ||
39 | ... | ||
40 | ... | ||
41 | }; | ||
42 | |||
43 | twl_reg2: regulator@1 { | ||
44 | ... | ||
45 | ... | ||
46 | ... | ||
47 | }; | ||
48 | |||
49 | mmc: mmc@0x0 { | ||
50 | ... | ||
51 | ... | ||
52 | vmmc-supply = <&twl_reg1>; | ||
53 | vmmcaux-supply = <&twl_reg2>; | ||
54 | }; | ||
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt new file mode 100644 index 000000000000..90ec45fd33ec --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt | |||
@@ -0,0 +1,20 @@ | |||
1 | * Samsung's S3C Real Time Clock controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible: should be one of the following. | ||
5 | * "samsung,s3c2410-rtc" - for controllers compatible with s3c2410 rtc. | ||
6 | * "samsung,s3c6410-rtc" - for controllers compatible with s3c6410 rtc. | ||
7 | - reg: physical base address of the controller and length of memory mapped | ||
8 | region. | ||
9 | - interrupts: Two interrupt numbers to the cpu should be specified. First | ||
10 | interrupt number is the rtc alarm interupt and second interrupt number | ||
11 | is the rtc tick interrupt. The number of cells representing a interrupt | ||
12 | depends on the parent interrupt controller. | ||
13 | |||
14 | Example: | ||
15 | |||
16 | rtc@10070000 { | ||
17 | compatible = "samsung,s3c6410-rtc"; | ||
18 | reg = <0x10070000 0x100>; | ||
19 | interrupts = <44 0 45 0>; | ||
20 | }; | ||
diff --git a/Documentation/devicetree/bindings/rtc/twl-rtc.txt b/Documentation/devicetree/bindings/rtc/twl-rtc.txt new file mode 100644 index 000000000000..596e0c97be7a --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/twl-rtc.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | * TI twl RTC | ||
2 | |||
3 | The TWL family (twl4030/6030) contains a RTC. | ||
4 | |||
5 | Required properties: | ||
6 | - compatible : Should be twl4030-rtc | ||
7 | |||
8 | Examples: | ||
9 | |||
10 | rtc@0 { | ||
11 | compatible = "ti,twl4030-rtc"; | ||
12 | }; | ||
diff --git a/Documentation/devicetree/bindings/serial/omap_serial.txt b/Documentation/devicetree/bindings/serial/omap_serial.txt new file mode 100644 index 000000000000..342eedd10050 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/omap_serial.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | OMAP UART controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : should be "ti,omap2-uart" for OMAP2 controllers | ||
5 | - compatible : should be "ti,omap3-uart" for OMAP3 controllers | ||
6 | - compatible : should be "ti,omap4-uart" for OMAP4 controllers | ||
7 | - ti,hwmods : Must be "uart<n>", n being the instance number (1-based) | ||
8 | |||
9 | Optional properties: | ||
10 | - clock-frequency : frequency of the clock input to the UART | ||
diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.txt b/Documentation/devicetree/bindings/serial/samsung_uart.txt new file mode 100644 index 000000000000..2c8a17cf5cb5 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/samsung_uart.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | * Samsung's UART Controller | ||
2 | |||
3 | The Samsung's UART controller is used for interfacing SoC with serial communicaion | ||
4 | devices. | ||
5 | |||
6 | Required properties: | ||
7 | - compatible: should be | ||
8 | - "samsung,exynos4210-uart", for UART's compatible with Exynos4210 uart ports. | ||
9 | |||
10 | - reg: base physical address of the controller and length of memory mapped | ||
11 | region. | ||
12 | |||
13 | - interrupts: interrupt number to the cpu. The interrupt specifier format depends | ||
14 | on the interrupt controller parent. | ||
diff --git a/Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt b/Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt new file mode 100644 index 000000000000..d5b0da8bf1d8 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tegra-audio-wm8903.txt | |||
@@ -0,0 +1,71 @@ | |||
1 | NVIDIA Tegra audio complex | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : "nvidia,tegra-audio-wm8903" | ||
5 | - nvidia,model : The user-visible name of this sound complex. | ||
6 | - nvidia,audio-routing : A list of the connections between audio components. | ||
7 | Each entry is a pair of strings, the first being the connection's sink, | ||
8 | the second being the connection's source. Valid names for sources and | ||
9 | sinks are the WM8903's pins, and the jacks on the board: | ||
10 | |||
11 | WM8903 pins: | ||
12 | |||
13 | * IN1L | ||
14 | * IN1R | ||
15 | * IN2L | ||
16 | * IN2R | ||
17 | * IN3L | ||
18 | * IN3R | ||
19 | * DMICDAT | ||
20 | * HPOUTL | ||
21 | * HPOUTR | ||
22 | * LINEOUTL | ||
23 | * LINEOUTR | ||
24 | * LOP | ||
25 | * LON | ||
26 | * ROP | ||
27 | * RON | ||
28 | * MICBIAS | ||
29 | |||
30 | Board connectors: | ||
31 | |||
32 | * Headphone Jack | ||
33 | * Int Spk | ||
34 | * Mic Jack | ||
35 | |||
36 | - nvidia,i2s-controller : The phandle of the Tegra I2S1 controller | ||
37 | - nvidia,audio-codec : The phandle of the WM8903 audio codec | ||
38 | |||
39 | Optional properties: | ||
40 | - nvidia,spkr-en-gpios : The GPIO that enables the speakers | ||
41 | - nvidia,hp-mute-gpios : The GPIO that mutes the headphones | ||
42 | - nvidia,hp-det-gpios : The GPIO that detect headphones are plugged in | ||
43 | - nvidia,int-mic-en-gpios : The GPIO that enables the internal microphone | ||
44 | - nvidia,ext-mic-en-gpios : The GPIO that enables the external microphone | ||
45 | |||
46 | Example: | ||
47 | |||
48 | sound { | ||
49 | compatible = "nvidia,tegra-audio-wm8903-harmony", | ||
50 | "nvidia,tegra-audio-wm8903" | ||
51 | nvidia,model = "tegra-wm8903-harmony"; | ||
52 | |||
53 | nvidia,audio-routing = | ||
54 | "Headphone Jack", "HPOUTR", | ||
55 | "Headphone Jack", "HPOUTL", | ||
56 | "Int Spk", "ROP", | ||
57 | "Int Spk", "RON", | ||
58 | "Int Spk", "LOP", | ||
59 | "Int Spk", "LON", | ||
60 | "Mic Jack", "MICBIAS", | ||
61 | "IN1L", "Mic Jack"; | ||
62 | |||
63 | nvidia,i2s-controller = <&i2s1>; | ||
64 | nvidia,audio-codec = <&wm8903>; | ||
65 | |||
66 | nvidia,spkr-en-gpios = <&codec 2 0>; | ||
67 | nvidia,hp-det-gpios = <&gpio 178 0>; /* gpio PW2 */ | ||
68 | nvidia,int-mic-en-gpios = <&gpio 184 0>; /*gpio PX0 */ | ||
69 | nvidia,ext-mic-en-gpios = <&gpio 185 0>; /* gpio PX1 */ | ||
70 | }; | ||
71 | |||
diff --git a/Documentation/devicetree/bindings/sound/tegra20-das.txt b/Documentation/devicetree/bindings/sound/tegra20-das.txt new file mode 100644 index 000000000000..6de3a7ee4efb --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tegra20-das.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | NVIDIA Tegra 20 DAS (Digital Audio Switch) controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : "nvidia,tegra20-das" | ||
5 | - reg : Should contain DAS registers location and length | ||
6 | |||
7 | Example: | ||
8 | |||
9 | das@70000c00 { | ||
10 | compatible = "nvidia,tegra20-das"; | ||
11 | reg = <0x70000c00 0x80>; | ||
12 | }; | ||
diff --git a/Documentation/devicetree/bindings/sound/tegra20-i2s.txt b/Documentation/devicetree/bindings/sound/tegra20-i2s.txt new file mode 100644 index 000000000000..0df2b5c816e3 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tegra20-i2s.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | NVIDIA Tegra 20 I2S controller | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : "nvidia,tegra20-i2s" | ||
5 | - reg : Should contain I2S registers location and length | ||
6 | - interrupts : Should contain I2S interrupt | ||
7 | - nvidia,dma-request-selector : The Tegra DMA controller's phandle and | ||
8 | request selector for this I2S controller | ||
9 | |||
10 | Example: | ||
11 | |||
12 | i2s@70002800 { | ||
13 | compatible = "nvidia,tegra20-i2s"; | ||
14 | reg = <0x70002800 0x200>; | ||
15 | interrupts = < 45 >; | ||
16 | nvidia,dma-request-selector = < &apbdma 2 >; | ||
17 | }; | ||
diff --git a/Documentation/devicetree/bindings/sound/wm8903.txt b/Documentation/devicetree/bindings/sound/wm8903.txt new file mode 100644 index 000000000000..f102cbc42694 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/wm8903.txt | |||
@@ -0,0 +1,50 @@ | |||
1 | WM8903 audio CODEC | ||
2 | |||
3 | This device supports I2C only. | ||
4 | |||
5 | Required properties: | ||
6 | |||
7 | - compatible : "wlf,wm8903" | ||
8 | |||
9 | - reg : the I2C address of the device. | ||
10 | |||
11 | - gpio-controller : Indicates this device is a GPIO controller. | ||
12 | |||
13 | - #gpio-cells : Should be two. The first cell is the pin number and the | ||
14 | second cell is used to specify optional parameters (currently unused). | ||
15 | |||
16 | Optional properties: | ||
17 | |||
18 | - interrupts : The interrupt line the codec is connected to. | ||
19 | |||
20 | - micdet-cfg : Default register value for R6 (Mic Bias). If absent, the | ||
21 | default is 0. | ||
22 | |||
23 | - micdet-delay : The debounce delay for microphone detection in mS. If | ||
24 | absent, the default is 100. | ||
25 | |||
26 | - gpio-cfg : A list of GPIO configuration register values. The list must | ||
27 | be 5 entries long. If absent, no configuration of these registers is | ||
28 | performed. If any entry has the value 0xffffffff, that GPIO's | ||
29 | configuration will not be modified. | ||
30 | |||
31 | Example: | ||
32 | |||
33 | codec: wm8903@1a { | ||
34 | compatible = "wlf,wm8903"; | ||
35 | reg = <0x1a>; | ||
36 | interrupts = < 347 >; | ||
37 | |||
38 | gpio-controller; | ||
39 | #gpio-cells = <2>; | ||
40 | |||
41 | micdet-cfg = <0>; | ||
42 | micdet-delay = <100>; | ||
43 | gpio-cfg = < | ||
44 | 0x0600 /* DMIC_LR, output */ | ||
45 | 0x0680 /* DMIC_DAT, input */ | ||
46 | 0x0000 /* GPIO, output, low */ | ||
47 | 0x0200 /* Interrupt, output */ | ||
48 | 0x01a0 /* BCLK, input, active high */ | ||
49 | >; | ||
50 | }; | ||
diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt new file mode 100644 index 000000000000..7a7eb1e7bda6 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/wm8994.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | WM1811/WM8994/WM8958 audio CODEC | ||
2 | |||
3 | These devices support both I2C and SPI (configured with pin strapping | ||
4 | on the board). | ||
5 | |||
6 | Required properties: | ||
7 | |||
8 | - compatible : "wlf,wm1811", "wlf,wm8994", "wlf,wm8958" | ||
9 | |||
10 | - reg : the I2C address of the device for I2C, the chip select | ||
11 | number for SPI. | ||
12 | |||
13 | Example: | ||
14 | |||
15 | codec: wm8994@1a { | ||
16 | compatible = "wlf,wm8994"; | ||
17 | reg = <0x1a>; | ||
18 | }; | ||
diff --git a/Documentation/devicetree/bindings/usb/tegra-usb.txt b/Documentation/devicetree/bindings/usb/tegra-usb.txt new file mode 100644 index 000000000000..035d63d5646d --- /dev/null +++ b/Documentation/devicetree/bindings/usb/tegra-usb.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | Tegra SOC USB controllers | ||
2 | |||
3 | The device node for a USB controller that is part of a Tegra | ||
4 | SOC is as described in the document "Open Firmware Recommended | ||
5 | Practice : Universal Serial Bus" with the following modifications | ||
6 | and additions : | ||
7 | |||
8 | Required properties : | ||
9 | - compatible : Should be "nvidia,tegra20-ehci" for USB controllers | ||
10 | used in host mode. | ||
11 | - phy_type : Should be one of "ulpi" or "utmi". | ||
12 | - nvidia,vbus-gpio : If present, specifies a gpio that needs to be | ||
13 | activated for the bus to be powered. | ||
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 874921e97802..ecc6a6cd26c1 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt | |||
@@ -8,7 +8,9 @@ amcc Applied Micro Circuits Corporation (APM, formally AMCC) | |||
8 | apm Applied Micro Circuits Corporation (APM) | 8 | apm Applied Micro Circuits Corporation (APM) |
9 | arm ARM Ltd. | 9 | arm ARM Ltd. |
10 | atmel Atmel Corporation | 10 | atmel Atmel Corporation |
11 | cavium Cavium, Inc. | ||
11 | chrp Common Hardware Reference Platform | 12 | chrp Common Hardware Reference Platform |
13 | cortina Cortina Systems, Inc. | ||
12 | dallas Maxim Integrated Products (formerly Dallas Semiconductor) | 14 | dallas Maxim Integrated Products (formerly Dallas Semiconductor) |
13 | denx Denx Software Engineering | 15 | denx Denx Software Engineering |
14 | epson Seiko Epson Corp. | 16 | epson Seiko Epson Corp. |
@@ -32,10 +34,13 @@ powervr Imagination Technologies | |||
32 | qcom Qualcomm, Inc. | 34 | qcom Qualcomm, Inc. |
33 | ramtron Ramtron International | 35 | ramtron Ramtron International |
34 | samsung Samsung Semiconductor | 36 | samsung Samsung Semiconductor |
37 | sbs Smart Battery System | ||
35 | schindler Schindler | 38 | schindler Schindler |
36 | sil Silicon Image | 39 | sil Silicon Image |
37 | simtek | 40 | simtek |
38 | sirf SiRF Technology, Inc. | 41 | sirf SiRF Technology, Inc. |
42 | st STMicroelectronics | ||
39 | stericsson ST-Ericsson | 43 | stericsson ST-Ericsson |
40 | ti Texas Instruments | 44 | ti Texas Instruments |
45 | wlf Wolfson Microelectronics | ||
41 | xlnx Xilinx | 46 | xlnx Xilinx |
diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt new file mode 100644 index 000000000000..3f682889068b --- /dev/null +++ b/Documentation/digsig.txt | |||
@@ -0,0 +1,96 @@ | |||
1 | Digital Signature Verification API | ||
2 | |||
3 | CONTENTS | ||
4 | |||
5 | 1. Introduction | ||
6 | 2. API | ||
7 | 3. User-space utilities | ||
8 | |||
9 | |||
10 | 1. Introduction | ||
11 | |||
12 | Digital signature verification API provides a method to verify digital signature. | ||
13 | Currently digital signatures are used by the IMA/EVM integrity protection subsystem. | ||
14 | |||
15 | Digital signature verification is implemented using cut-down kernel port of | ||
16 | GnuPG multi-precision integers (MPI) library. The kernel port provides | ||
17 | memory allocation errors handling, has been refactored according to kernel | ||
18 | coding style, and checkpatch.pl reported errors and warnings have been fixed. | ||
19 | |||
20 | Public key and signature consist of header and MPIs. | ||
21 | |||
22 | struct pubkey_hdr { | ||
23 | uint8_t version; /* key format version */ | ||
24 | time_t timestamp; /* key made, always 0 for now */ | ||
25 | uint8_t algo; | ||
26 | uint8_t nmpi; | ||
27 | char mpi[0]; | ||
28 | } __packed; | ||
29 | |||
30 | struct signature_hdr { | ||
31 | uint8_t version; /* signature format version */ | ||
32 | time_t timestamp; /* signature made */ | ||
33 | uint8_t algo; | ||
34 | uint8_t hash; | ||
35 | uint8_t keyid[8]; | ||
36 | uint8_t nmpi; | ||
37 | char mpi[0]; | ||
38 | } __packed; | ||
39 | |||
40 | keyid equals to SHA1[12-19] over the total key content. | ||
41 | Signature header is used as an input to generate a signature. | ||
42 | Such approach insures that key or signature header could not be changed. | ||
43 | It protects timestamp from been changed and can be used for rollback | ||
44 | protection. | ||
45 | |||
46 | 2. API | ||
47 | |||
48 | API currently includes only 1 function: | ||
49 | |||
50 | digsig_verify() - digital signature verification with public key | ||
51 | |||
52 | |||
53 | /** | ||
54 | * digsig_verify() - digital signature verification with public key | ||
55 | * @keyring: keyring to search key in | ||
56 | * @sig: digital signature | ||
57 | * @sigen: length of the signature | ||
58 | * @data: data | ||
59 | * @datalen: length of the data | ||
60 | * @return: 0 on success, -EINVAL otherwise | ||
61 | * | ||
62 | * Verifies data integrity against digital signature. | ||
63 | * Currently only RSA is supported. | ||
64 | * Normally hash of the content is used as a data for this function. | ||
65 | * | ||
66 | */ | ||
67 | int digsig_verify(struct key *keyring, const char *sig, int siglen, | ||
68 | const char *data, int datalen); | ||
69 | |||
70 | 3. User-space utilities | ||
71 | |||
72 | The signing and key management utilities evm-utils provide functionality | ||
73 | to generate signatures, to load keys into the kernel keyring. | ||
74 | Keys can be in PEM or converted to the kernel format. | ||
75 | When the key is added to the kernel keyring, the keyid defines the name | ||
76 | of the key: 5D2B05FC633EE3E8 in the example bellow. | ||
77 | |||
78 | Here is example output of the keyctl utility. | ||
79 | |||
80 | $ keyctl show | ||
81 | Session Keyring | ||
82 | -3 --alswrv 0 0 keyring: _ses | ||
83 | 603976250 --alswrv 0 -1 \_ keyring: _uid.0 | ||
84 | 817777377 --alswrv 0 0 \_ user: kmk | ||
85 | 891974900 --alswrv 0 0 \_ encrypted: evm-key | ||
86 | 170323636 --alswrv 0 0 \_ keyring: _module | ||
87 | 548221616 --alswrv 0 0 \_ keyring: _ima | ||
88 | 128198054 --alswrv 0 0 \_ keyring: _evm | ||
89 | |||
90 | $ keyctl list 128198054 | ||
91 | 1 key in keyring: | ||
92 | 620789745: --alswrv 0 0 user: 5D2B05FC633EE3E8 | ||
93 | |||
94 | |||
95 | Dmitry Kasatkin | ||
96 | 06.10.2011 | ||
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt new file mode 100644 index 000000000000..510eab32f392 --- /dev/null +++ b/Documentation/dma-buf-sharing.txt | |||
@@ -0,0 +1,224 @@ | |||
1 | DMA Buffer Sharing API Guide | ||
2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
3 | |||
4 | Sumit Semwal | ||
5 | <sumit dot semwal at linaro dot org> | ||
6 | <sumit dot semwal at ti dot com> | ||
7 | |||
8 | This document serves as a guide to device-driver writers on what is the dma-buf | ||
9 | buffer sharing API, how to use it for exporting and using shared buffers. | ||
10 | |||
11 | Any device driver which wishes to be a part of DMA buffer sharing, can do so as | ||
12 | either the 'exporter' of buffers, or the 'user' of buffers. | ||
13 | |||
14 | Say a driver A wants to use buffers created by driver B, then we call B as the | ||
15 | exporter, and A as buffer-user. | ||
16 | |||
17 | The exporter | ||
18 | - implements and manages operations[1] for the buffer | ||
19 | - allows other users to share the buffer by using dma_buf sharing APIs, | ||
20 | - manages the details of buffer allocation, | ||
21 | - decides about the actual backing storage where this allocation happens, | ||
22 | - takes care of any migration of scatterlist - for all (shared) users of this | ||
23 | buffer, | ||
24 | |||
25 | The buffer-user | ||
26 | - is one of (many) sharing users of the buffer. | ||
27 | - doesn't need to worry about how the buffer is allocated, or where. | ||
28 | - needs a mechanism to get access to the scatterlist that makes up this buffer | ||
29 | in memory, mapped into its own address space, so it can access the same area | ||
30 | of memory. | ||
31 | |||
32 | *IMPORTANT*: [see https://lkml.org/lkml/2011/12/20/211 for more details] | ||
33 | For this first version, A buffer shared using the dma_buf sharing API: | ||
34 | - *may* be exported to user space using "mmap" *ONLY* by exporter, outside of | ||
35 | this framework. | ||
36 | - may be used *ONLY* by importers that do not need CPU access to the buffer. | ||
37 | |||
38 | The dma_buf buffer sharing API usage contains the following steps: | ||
39 | |||
40 | 1. Exporter announces that it wishes to export a buffer | ||
41 | 2. Userspace gets the file descriptor associated with the exported buffer, and | ||
42 | passes it around to potential buffer-users based on use case | ||
43 | 3. Each buffer-user 'connects' itself to the buffer | ||
44 | 4. When needed, buffer-user requests access to the buffer from exporter | ||
45 | 5. When finished with its use, the buffer-user notifies end-of-DMA to exporter | ||
46 | 6. when buffer-user is done using this buffer completely, it 'disconnects' | ||
47 | itself from the buffer. | ||
48 | |||
49 | |||
50 | 1. Exporter's announcement of buffer export | ||
51 | |||
52 | The buffer exporter announces its wish to export a buffer. In this, it | ||
53 | connects its own private buffer data, provides implementation for operations | ||
54 | that can be performed on the exported dma_buf, and flags for the file | ||
55 | associated with this buffer. | ||
56 | |||
57 | Interface: | ||
58 | struct dma_buf *dma_buf_export(void *priv, struct dma_buf_ops *ops, | ||
59 | size_t size, int flags) | ||
60 | |||
61 | If this succeeds, dma_buf_export allocates a dma_buf structure, and returns a | ||
62 | pointer to the same. It also associates an anonymous file with this buffer, | ||
63 | so it can be exported. On failure to allocate the dma_buf object, it returns | ||
64 | NULL. | ||
65 | |||
66 | 2. Userspace gets a handle to pass around to potential buffer-users | ||
67 | |||
68 | Userspace entity requests for a file-descriptor (fd) which is a handle to the | ||
69 | anonymous file associated with the buffer. It can then share the fd with other | ||
70 | drivers and/or processes. | ||
71 | |||
72 | Interface: | ||
73 | int dma_buf_fd(struct dma_buf *dmabuf) | ||
74 | |||
75 | This API installs an fd for the anonymous file associated with this buffer; | ||
76 | returns either 'fd', or error. | ||
77 | |||
78 | 3. Each buffer-user 'connects' itself to the buffer | ||
79 | |||
80 | Each buffer-user now gets a reference to the buffer, using the fd passed to | ||
81 | it. | ||
82 | |||
83 | Interface: | ||
84 | struct dma_buf *dma_buf_get(int fd) | ||
85 | |||
86 | This API will return a reference to the dma_buf, and increment refcount for | ||
87 | it. | ||
88 | |||
89 | After this, the buffer-user needs to attach its device with the buffer, which | ||
90 | helps the exporter to know of device buffer constraints. | ||
91 | |||
92 | Interface: | ||
93 | struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, | ||
94 | struct device *dev) | ||
95 | |||
96 | This API returns reference to an attachment structure, which is then used | ||
97 | for scatterlist operations. It will optionally call the 'attach' dma_buf | ||
98 | operation, if provided by the exporter. | ||
99 | |||
100 | The dma-buf sharing framework does the bookkeeping bits related to managing | ||
101 | the list of all attachments to a buffer. | ||
102 | |||
103 | Until this stage, the buffer-exporter has the option to choose not to actually | ||
104 | allocate the backing storage for this buffer, but wait for the first buffer-user | ||
105 | to request use of buffer for allocation. | ||
106 | |||
107 | |||
108 | 4. When needed, buffer-user requests access to the buffer | ||
109 | |||
110 | Whenever a buffer-user wants to use the buffer for any DMA, it asks for | ||
111 | access to the buffer using dma_buf_map_attachment API. At least one attach to | ||
112 | the buffer must have happened before map_dma_buf can be called. | ||
113 | |||
114 | Interface: | ||
115 | struct sg_table * dma_buf_map_attachment(struct dma_buf_attachment *, | ||
116 | enum dma_data_direction); | ||
117 | |||
118 | This is a wrapper to dma_buf->ops->map_dma_buf operation, which hides the | ||
119 | "dma_buf->ops->" indirection from the users of this interface. | ||
120 | |||
121 | In struct dma_buf_ops, map_dma_buf is defined as | ||
122 | struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *, | ||
123 | enum dma_data_direction); | ||
124 | |||
125 | It is one of the buffer operations that must be implemented by the exporter. | ||
126 | It should return the sg_table containing scatterlist for this buffer, mapped | ||
127 | into caller's address space. | ||
128 | |||
129 | If this is being called for the first time, the exporter can now choose to | ||
130 | scan through the list of attachments for this buffer, collate the requirements | ||
131 | of the attached devices, and choose an appropriate backing storage for the | ||
132 | buffer. | ||
133 | |||
134 | Based on enum dma_data_direction, it might be possible to have multiple users | ||
135 | accessing at the same time (for reading, maybe), or any other kind of sharing | ||
136 | that the exporter might wish to make available to buffer-users. | ||
137 | |||
138 | map_dma_buf() operation can return -EINTR if it is interrupted by a signal. | ||
139 | |||
140 | |||
141 | 5. When finished, the buffer-user notifies end-of-DMA to exporter | ||
142 | |||
143 | Once the DMA for the current buffer-user is over, it signals 'end-of-DMA' to | ||
144 | the exporter using the dma_buf_unmap_attachment API. | ||
145 | |||
146 | Interface: | ||
147 | void dma_buf_unmap_attachment(struct dma_buf_attachment *, | ||
148 | struct sg_table *); | ||
149 | |||
150 | This is a wrapper to dma_buf->ops->unmap_dma_buf() operation, which hides the | ||
151 | "dma_buf->ops->" indirection from the users of this interface. | ||
152 | |||
153 | In struct dma_buf_ops, unmap_dma_buf is defined as | ||
154 | void (*unmap_dma_buf)(struct dma_buf_attachment *, struct sg_table *); | ||
155 | |||
156 | unmap_dma_buf signifies the end-of-DMA for the attachment provided. Like | ||
157 | map_dma_buf, this API also must be implemented by the exporter. | ||
158 | |||
159 | |||
160 | 6. when buffer-user is done using this buffer, it 'disconnects' itself from the | ||
161 | buffer. | ||
162 | |||
163 | After the buffer-user has no more interest in using this buffer, it should | ||
164 | disconnect itself from the buffer: | ||
165 | |||
166 | - it first detaches itself from the buffer. | ||
167 | |||
168 | Interface: | ||
169 | void dma_buf_detach(struct dma_buf *dmabuf, | ||
170 | struct dma_buf_attachment *dmabuf_attach); | ||
171 | |||
172 | This API removes the attachment from the list in dmabuf, and optionally calls | ||
173 | dma_buf->ops->detach(), if provided by exporter, for any housekeeping bits. | ||
174 | |||
175 | - Then, the buffer-user returns the buffer reference to exporter. | ||
176 | |||
177 | Interface: | ||
178 | void dma_buf_put(struct dma_buf *dmabuf); | ||
179 | |||
180 | This API then reduces the refcount for this buffer. | ||
181 | |||
182 | If, as a result of this call, the refcount becomes 0, the 'release' file | ||
183 | operation related to this fd is called. It calls the dmabuf->ops->release() | ||
184 | operation in turn, and frees the memory allocated for dmabuf when exported. | ||
185 | |||
186 | NOTES: | ||
187 | - Importance of attach-detach and {map,unmap}_dma_buf operation pairs | ||
188 | The attach-detach calls allow the exporter to figure out backing-storage | ||
189 | constraints for the currently-interested devices. This allows preferential | ||
190 | allocation, and/or migration of pages across different types of storage | ||
191 | available, if possible. | ||
192 | |||
193 | Bracketing of DMA access with {map,unmap}_dma_buf operations is essential | ||
194 | to allow just-in-time backing of storage, and migration mid-way through a | ||
195 | use-case. | ||
196 | |||
197 | - Migration of backing storage if needed | ||
198 | If after | ||
199 | - at least one map_dma_buf has happened, | ||
200 | - and the backing storage has been allocated for this buffer, | ||
201 | another new buffer-user intends to attach itself to this buffer, it might | ||
202 | be allowed, if possible for the exporter. | ||
203 | |||
204 | In case it is allowed by the exporter: | ||
205 | if the new buffer-user has stricter 'backing-storage constraints', and the | ||
206 | exporter can handle these constraints, the exporter can just stall on the | ||
207 | map_dma_buf until all outstanding access is completed (as signalled by | ||
208 | unmap_dma_buf). | ||
209 | Once all users have finished accessing and have unmapped this buffer, the | ||
210 | exporter could potentially move the buffer to the stricter backing-storage, | ||
211 | and then allow further {map,unmap}_dma_buf operations from any buffer-user | ||
212 | from the migrated backing-storage. | ||
213 | |||
214 | If the exporter cannot fulfil the backing-storage constraints of the new | ||
215 | buffer-user device as requested, dma_buf_attach() would return an error to | ||
216 | denote non-compatibility of the new buffer-sharing request with the current | ||
217 | buffer. | ||
218 | |||
219 | If the exporter chooses not to allow an attach() operation once a | ||
220 | map_dma_buf() API has been called, it simply returns an error. | ||
221 | |||
222 | References: | ||
223 | [1] struct dma_buf_ops in include/linux/dma-buf.h | ||
224 | [2] All interfaces mentioned above defined in include/linux/dma-buf.h | ||
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index dfa6fc6e4b28..0c083c5c2faa 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -66,7 +66,6 @@ GRTAGS | |||
66 | GSYMS | 66 | GSYMS |
67 | GTAGS | 67 | GTAGS |
68 | Image | 68 | Image |
69 | Kerntypes | ||
70 | Module.markers | 69 | Module.markers |
71 | Module.symvers | 70 | Module.symvers |
72 | PENDING | 71 | PENDING |
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index d79aead9418b..10c64c8a13d4 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt | |||
@@ -262,6 +262,7 @@ IOMAP | |||
262 | devm_ioremap() | 262 | devm_ioremap() |
263 | devm_ioremap_nocache() | 263 | devm_ioremap_nocache() |
264 | devm_iounmap() | 264 | devm_iounmap() |
265 | devm_request_and_ioremap() : checks resource, requests region, ioremaps | ||
265 | pcim_iomap() | 266 | pcim_iomap() |
266 | pcim_iounmap() | 267 | pcim_iounmap() |
267 | pcim_iomap_table() : array of mapped addresses indexed by BAR | 268 | pcim_iomap_table() : array of mapped addresses indexed by BAR |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 3d849122b5b1..d49c2ec72d12 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -85,17 +85,6 @@ Who: Robin Getz <rgetz@blackfin.uclinux.org> & Matt Mackall <mpm@selenic.com> | |||
85 | 85 | ||
86 | --------------------------- | 86 | --------------------------- |
87 | 87 | ||
88 | What: Deprecated snapshot ioctls | ||
89 | When: 2.6.36 | ||
90 | |||
91 | Why: The ioctls in kernel/power/user.c were marked as deprecated long time | ||
92 | ago. Now they notify users about that so that they need to replace | ||
93 | their userspace. After some more time, remove them completely. | ||
94 | |||
95 | Who: Jiri Slaby <jirislaby@gmail.com> | ||
96 | |||
97 | --------------------------- | ||
98 | |||
99 | What: The ieee80211_regdom module parameter | 88 | What: The ieee80211_regdom module parameter |
100 | When: March 2010 / desktop catchup | 89 | When: March 2010 / desktop catchup |
101 | 90 | ||
@@ -263,8 +252,7 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org> | |||
263 | 252 | ||
264 | What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS | 253 | What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS |
265 | (in net/core/net-sysfs.c) | 254 | (in net/core/net-sysfs.c) |
266 | When: After the only user (hal) has seen a release with the patches | 255 | When: 3.5 |
267 | for enough time, probably some time in 2010. | ||
268 | Why: Over 1K .text/.data size reduction, data is available in other | 256 | Why: Over 1K .text/.data size reduction, data is available in other |
269 | ways (ioctls) | 257 | ways (ioctls) |
270 | Who: Johannes Berg <johannes@sipsolutions.net> | 258 | Who: Johannes Berg <johannes@sipsolutions.net> |
@@ -362,15 +350,6 @@ Who: anybody or Florian Mickler <florian@mickler.org> | |||
362 | 350 | ||
363 | ---------------------------- | 351 | ---------------------------- |
364 | 352 | ||
365 | What: KVM paravirt mmu host support | ||
366 | When: January 2011 | ||
367 | Why: The paravirt mmu host support is slower than non-paravirt mmu, both | ||
368 | on newer and older hardware. It is already not exposed to the guest, | ||
369 | and kept only for live migration purposes. | ||
370 | Who: Avi Kivity <avi@redhat.com> | ||
371 | |||
372 | ---------------------------- | ||
373 | |||
374 | What: iwlwifi 50XX module parameters | 353 | What: iwlwifi 50XX module parameters |
375 | When: 3.0 | 354 | When: 3.0 |
376 | Why: The "..50" modules parameters were used to configure 5000 series and | 355 | Why: The "..50" modules parameters were used to configure 5000 series and |
@@ -535,6 +514,20 @@ Why: In 3.0, we can now autodetect internal 3G device and already have | |||
535 | information log when acer-wmi initial. | 514 | information log when acer-wmi initial. |
536 | Who: Lee, Chun-Yi <jlee@novell.com> | 515 | Who: Lee, Chun-Yi <jlee@novell.com> |
537 | 516 | ||
517 | --------------------------- | ||
518 | |||
519 | What: /sys/devices/platform/_UDC_/udc/_UDC_/is_dualspeed file and | ||
520 | is_dualspeed line in /sys/devices/platform/ci13xxx_*/udc/device file. | ||
521 | When: 3.8 | ||
522 | Why: The is_dualspeed file is superseded by maximum_speed in the same | ||
523 | directory and is_dualspeed line in device file is superseded by | ||
524 | max_speed line in the same file. | ||
525 | |||
526 | The maximum_speed/max_speed specifies maximum speed supported by UDC. | ||
527 | To check if dualspeeed is supported, check if the value is >= 3. | ||
528 | Various possible speeds are defined in <linux/usb/ch9.h>. | ||
529 | Who: Michal Nazarewicz <mina86@mina86.com> | ||
530 | |||
538 | ---------------------------- | 531 | ---------------------------- |
539 | 532 | ||
540 | What: The XFS nodelaylog mount option | 533 | What: The XFS nodelaylog mount option |
@@ -551,3 +544,15 @@ When: 3.5 | |||
551 | Why: The iwlagn module has been renamed iwlwifi. The alias will be around | 544 | Why: The iwlagn module has been renamed iwlwifi. The alias will be around |
552 | for backward compatibility for several cycles and then dropped. | 545 | for backward compatibility for several cycles and then dropped. |
553 | Who: Don Fry <donald.h.fry@intel.com> | 546 | Who: Don Fry <donald.h.fry@intel.com> |
547 | |||
548 | ---------------------------- | ||
549 | |||
550 | What: pci_scan_bus_parented() | ||
551 | When: 3.5 | ||
552 | Why: The pci_scan_bus_parented() interface creates a new root bus. The | ||
553 | bus is created with default resources (ioport_resource and | ||
554 | iomem_resource) that are always wrong, so we rely on arch code to | ||
555 | correct them later. Callers of pci_scan_bus_parented() should | ||
556 | convert to using pci_scan_root_bus() so they can supply a list of | ||
557 | bus resources when the bus is created. | ||
558 | Who: Bjorn Helgaas <bhelgaas@google.com> | ||
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d819ba16a0c7..4fca82e5276e 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -37,15 +37,15 @@ d_manage: no no yes (ref-walk) maybe | |||
37 | 37 | ||
38 | --------------------------- inode_operations --------------------------- | 38 | --------------------------- inode_operations --------------------------- |
39 | prototypes: | 39 | prototypes: |
40 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); | 40 | int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *); |
41 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid | 41 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid |
42 | ata *); | 42 | ata *); |
43 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 43 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
44 | int (*unlink) (struct inode *,struct dentry *); | 44 | int (*unlink) (struct inode *,struct dentry *); |
45 | int (*symlink) (struct inode *,struct dentry *,const char *); | 45 | int (*symlink) (struct inode *,struct dentry *,const char *); |
46 | int (*mkdir) (struct inode *,struct dentry *,int); | 46 | int (*mkdir) (struct inode *,struct dentry *,umode_t); |
47 | int (*rmdir) (struct inode *,struct dentry *); | 47 | int (*rmdir) (struct inode *,struct dentry *); |
48 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); | 48 | int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); |
49 | int (*rename) (struct inode *, struct dentry *, | 49 | int (*rename) (struct inode *, struct dentry *, |
50 | struct inode *, struct dentry *); | 50 | struct inode *, struct dentry *); |
51 | int (*readlink) (struct dentry *, char __user *,int); | 51 | int (*readlink) (struct dentry *, char __user *,int); |
@@ -117,7 +117,7 @@ prototypes: | |||
117 | int (*statfs) (struct dentry *, struct kstatfs *); | 117 | int (*statfs) (struct dentry *, struct kstatfs *); |
118 | int (*remount_fs) (struct super_block *, int *, char *); | 118 | int (*remount_fs) (struct super_block *, int *, char *); |
119 | void (*umount_begin) (struct super_block *); | 119 | void (*umount_begin) (struct super_block *); |
120 | int (*show_options)(struct seq_file *, struct vfsmount *); | 120 | int (*show_options)(struct seq_file *, struct dentry *); |
121 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 121 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
122 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 122 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
123 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 123 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index dd57bb6bb390..b40fec9d3f53 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt | |||
@@ -192,7 +192,7 @@ attribute value uses the store_attribute() method. | |||
192 | struct configfs_attribute { | 192 | struct configfs_attribute { |
193 | char *ca_name; | 193 | char *ca_name; |
194 | struct module *ca_owner; | 194 | struct module *ca_owner; |
195 | mode_t ca_mode; | 195 | umode_t ca_mode; |
196 | }; | 196 | }; |
197 | 197 | ||
198 | When a config_item wants an attribute to appear as a file in the item's | 198 | When a config_item wants an attribute to appear as a file in the item's |
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 742cc06e138f..6872c91bce35 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt | |||
@@ -35,7 +35,7 @@ described below will work. | |||
35 | 35 | ||
36 | The most general way to create a file within a debugfs directory is with: | 36 | The most general way to create a file within a debugfs directory is with: |
37 | 37 | ||
38 | struct dentry *debugfs_create_file(const char *name, mode_t mode, | 38 | struct dentry *debugfs_create_file(const char *name, umode_t mode, |
39 | struct dentry *parent, void *data, | 39 | struct dentry *parent, void *data, |
40 | const struct file_operations *fops); | 40 | const struct file_operations *fops); |
41 | 41 | ||
@@ -53,13 +53,13 @@ actually necessary; the debugfs code provides a number of helper functions | |||
53 | for simple situations. Files containing a single integer value can be | 53 | for simple situations. Files containing a single integer value can be |
54 | created with any of: | 54 | created with any of: |
55 | 55 | ||
56 | struct dentry *debugfs_create_u8(const char *name, mode_t mode, | 56 | struct dentry *debugfs_create_u8(const char *name, umode_t mode, |
57 | struct dentry *parent, u8 *value); | 57 | struct dentry *parent, u8 *value); |
58 | struct dentry *debugfs_create_u16(const char *name, mode_t mode, | 58 | struct dentry *debugfs_create_u16(const char *name, umode_t mode, |
59 | struct dentry *parent, u16 *value); | 59 | struct dentry *parent, u16 *value); |
60 | struct dentry *debugfs_create_u32(const char *name, mode_t mode, | 60 | struct dentry *debugfs_create_u32(const char *name, umode_t mode, |
61 | struct dentry *parent, u32 *value); | 61 | struct dentry *parent, u32 *value); |
62 | struct dentry *debugfs_create_u64(const char *name, mode_t mode, | 62 | struct dentry *debugfs_create_u64(const char *name, umode_t mode, |
63 | struct dentry *parent, u64 *value); | 63 | struct dentry *parent, u64 *value); |
64 | 64 | ||
65 | These files support both reading and writing the given value; if a specific | 65 | These files support both reading and writing the given value; if a specific |
@@ -67,13 +67,13 @@ file should not be written to, simply set the mode bits accordingly. The | |||
67 | values in these files are in decimal; if hexadecimal is more appropriate, | 67 | values in these files are in decimal; if hexadecimal is more appropriate, |
68 | the following functions can be used instead: | 68 | the following functions can be used instead: |
69 | 69 | ||
70 | struct dentry *debugfs_create_x8(const char *name, mode_t mode, | 70 | struct dentry *debugfs_create_x8(const char *name, umode_t mode, |
71 | struct dentry *parent, u8 *value); | 71 | struct dentry *parent, u8 *value); |
72 | struct dentry *debugfs_create_x16(const char *name, mode_t mode, | 72 | struct dentry *debugfs_create_x16(const char *name, umode_t mode, |
73 | struct dentry *parent, u16 *value); | 73 | struct dentry *parent, u16 *value); |
74 | struct dentry *debugfs_create_x32(const char *name, mode_t mode, | 74 | struct dentry *debugfs_create_x32(const char *name, umode_t mode, |
75 | struct dentry *parent, u32 *value); | 75 | struct dentry *parent, u32 *value); |
76 | struct dentry *debugfs_create_x64(const char *name, mode_t mode, | 76 | struct dentry *debugfs_create_x64(const char *name, umode_t mode, |
77 | struct dentry *parent, u64 *value); | 77 | struct dentry *parent, u64 *value); |
78 | 78 | ||
79 | These functions are useful as long as the developer knows the size of the | 79 | These functions are useful as long as the developer knows the size of the |
@@ -81,7 +81,7 @@ value to be exported. Some types can have different widths on different | |||
81 | architectures, though, complicating the situation somewhat. There is a | 81 | architectures, though, complicating the situation somewhat. There is a |
82 | function meant to help out in one special case: | 82 | function meant to help out in one special case: |
83 | 83 | ||
84 | struct dentry *debugfs_create_size_t(const char *name, mode_t mode, | 84 | struct dentry *debugfs_create_size_t(const char *name, umode_t mode, |
85 | struct dentry *parent, | 85 | struct dentry *parent, |
86 | size_t *value); | 86 | size_t *value); |
87 | 87 | ||
@@ -90,21 +90,22 @@ a variable of type size_t. | |||
90 | 90 | ||
91 | Boolean values can be placed in debugfs with: | 91 | Boolean values can be placed in debugfs with: |
92 | 92 | ||
93 | struct dentry *debugfs_create_bool(const char *name, mode_t mode, | 93 | struct dentry *debugfs_create_bool(const char *name, umode_t mode, |
94 | struct dentry *parent, u32 *value); | 94 | struct dentry *parent, u32 *value); |
95 | 95 | ||
96 | A read on the resulting file will yield either Y (for non-zero values) or | 96 | A read on the resulting file will yield either Y (for non-zero values) or |
97 | N, followed by a newline. If written to, it will accept either upper- or | 97 | N, followed by a newline. If written to, it will accept either upper- or |
98 | lower-case values, or 1 or 0. Any other input will be silently ignored. | 98 | lower-case values, or 1 or 0. Any other input will be silently ignored. |
99 | 99 | ||
100 | Finally, a block of arbitrary binary data can be exported with: | 100 | Another option is exporting a block of arbitrary binary data, with |
101 | this structure and function: | ||
101 | 102 | ||
102 | struct debugfs_blob_wrapper { | 103 | struct debugfs_blob_wrapper { |
103 | void *data; | 104 | void *data; |
104 | unsigned long size; | 105 | unsigned long size; |
105 | }; | 106 | }; |
106 | 107 | ||
107 | struct dentry *debugfs_create_blob(const char *name, mode_t mode, | 108 | struct dentry *debugfs_create_blob(const char *name, umode_t mode, |
108 | struct dentry *parent, | 109 | struct dentry *parent, |
109 | struct debugfs_blob_wrapper *blob); | 110 | struct debugfs_blob_wrapper *blob); |
110 | 111 | ||
@@ -115,6 +116,35 @@ can be used to export binary information, but there does not appear to be | |||
115 | any code which does so in the mainline. Note that all files created with | 116 | any code which does so in the mainline. Note that all files created with |
116 | debugfs_create_blob() are read-only. | 117 | debugfs_create_blob() are read-only. |
117 | 118 | ||
119 | If you want to dump a block of registers (something that happens quite | ||
120 | often during development, even if little such code reaches mainline. | ||
121 | Debugfs offers two functions: one to make a registers-only file, and | ||
122 | another to insert a register block in the middle of another sequential | ||
123 | file. | ||
124 | |||
125 | struct debugfs_reg32 { | ||
126 | char *name; | ||
127 | unsigned long offset; | ||
128 | }; | ||
129 | |||
130 | struct debugfs_regset32 { | ||
131 | struct debugfs_reg32 *regs; | ||
132 | int nregs; | ||
133 | void __iomem *base; | ||
134 | }; | ||
135 | |||
136 | struct dentry *debugfs_create_regset32(const char *name, mode_t mode, | ||
137 | struct dentry *parent, | ||
138 | struct debugfs_regset32 *regset); | ||
139 | |||
140 | int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs, | ||
141 | int nregs, void __iomem *base, char *prefix); | ||
142 | |||
143 | The "base" argument may be 0, but you may want to build the reg32 array | ||
144 | using __stringify, and a number of register names (macros) are actually | ||
145 | byte offsets over a base for the register block. | ||
146 | |||
147 | |||
118 | There are a couple of other directory-oriented helper functions: | 148 | There are a couple of other directory-oriented helper functions: |
119 | 149 | ||
120 | struct dentry *debugfs_rename(struct dentry *old_dir, | 150 | struct dentry *debugfs_rename(struct dentry *old_dir, |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 4917cf24a5e0..10ec4639f152 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls | |||
581 | behaviour may change in the future as it is | 581 | behaviour may change in the future as it is |
582 | not necessary and has been done this way only | 582 | not necessary and has been done this way only |
583 | for sake of simplicity. | 583 | for sake of simplicity. |
584 | |||
585 | EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number | ||
586 | of blocks of resized filesystem is passed in via | ||
587 | 64 bit integer argument. The kernel allocates | ||
588 | bitmaps and inode table, the userspace tool thus | ||
589 | just passes the new number of blocks. | ||
590 | |||
584 | .............................................................................. | 591 | .............................................................................. |
585 | 592 | ||
586 | References | 593 | References |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 0ec91f03422e..a76a26a1db8a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -41,6 +41,8 @@ Table of Contents | |||
41 | 3.5 /proc/<pid>/mountinfo - Information about mounts | 41 | 3.5 /proc/<pid>/mountinfo - Information about mounts |
42 | 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm | 42 | 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm |
43 | 43 | ||
44 | 4 Configuring procfs | ||
45 | 4.1 Mount options | ||
44 | 46 | ||
45 | ------------------------------------------------------------------------------ | 47 | ------------------------------------------------------------------------------ |
46 | Preface | 48 | Preface |
@@ -305,6 +307,9 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) | |||
305 | blkio_ticks time spent waiting for block IO | 307 | blkio_ticks time spent waiting for block IO |
306 | gtime guest time of the task in jiffies | 308 | gtime guest time of the task in jiffies |
307 | cgtime guest time of the task children in jiffies | 309 | cgtime guest time of the task children in jiffies |
310 | start_data address above which program data+bss is placed | ||
311 | end_data address below which program data+bss is placed | ||
312 | start_brk address above which program heap can be expanded with brk() | ||
308 | .............................................................................. | 313 | .............................................................................. |
309 | 314 | ||
310 | The /proc/PID/maps file containing the currently mapped memory regions and | 315 | The /proc/PID/maps file containing the currently mapped memory regions and |
@@ -1542,3 +1547,40 @@ a task to set its own or one of its thread siblings comm value. The comm value | |||
1542 | is limited in size compared to the cmdline value, so writing anything longer | 1547 | is limited in size compared to the cmdline value, so writing anything longer |
1543 | then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated | 1548 | then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated |
1544 | comm value. | 1549 | comm value. |
1550 | |||
1551 | |||
1552 | ------------------------------------------------------------------------------ | ||
1553 | Configuring procfs | ||
1554 | ------------------------------------------------------------------------------ | ||
1555 | |||
1556 | 4.1 Mount options | ||
1557 | --------------------- | ||
1558 | |||
1559 | The following mount options are supported: | ||
1560 | |||
1561 | hidepid= Set /proc/<pid>/ access mode. | ||
1562 | gid= Set the group authorized to learn processes information. | ||
1563 | |||
1564 | hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories | ||
1565 | (default). | ||
1566 | |||
1567 | hidepid=1 means users may not access any /proc/<pid>/ directories but their | ||
1568 | own. Sensitive files like cmdline, sched*, status are now protected against | ||
1569 | other users. This makes it impossible to learn whether any user runs | ||
1570 | specific program (given the program doesn't reveal itself by its behaviour). | ||
1571 | As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users, | ||
1572 | poorly written programs passing sensitive information via program arguments are | ||
1573 | now protected against local eavesdroppers. | ||
1574 | |||
1575 | hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other | ||
1576 | users. It doesn't mean that it hides a fact whether a process with a specific | ||
1577 | pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"), | ||
1578 | but it hides process' uid and gid, which may be learned by stat()'ing | ||
1579 | /proc/<pid>/ otherwise. It greatly complicates an intruder's task of gathering | ||
1580 | information about running processes, whether some daemon runs with elevated | ||
1581 | privileges, whether other user runs some sensitive program, whether other users | ||
1582 | run any program at all, etc. | ||
1583 | |||
1584 | gid= defines a group authorized to learn processes information otherwise | ||
1585 | prohibited by hidepid=. If you use some daemon like identd which needs to learn | ||
1586 | information about processes information, just add identd to this group. | ||
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 07235caec22c..a6619b7064b9 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt | |||
@@ -70,7 +70,7 @@ An attribute definition is simply: | |||
70 | struct attribute { | 70 | struct attribute { |
71 | char * name; | 71 | char * name; |
72 | struct module *owner; | 72 | struct module *owner; |
73 | mode_t mode; | 73 | umode_t mode; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | 76 | ||
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 43cbd0821721..3d9393b845b8 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -225,7 +225,7 @@ struct super_operations { | |||
225 | void (*clear_inode) (struct inode *); | 225 | void (*clear_inode) (struct inode *); |
226 | void (*umount_begin) (struct super_block *); | 226 | void (*umount_begin) (struct super_block *); |
227 | 227 | ||
228 | int (*show_options)(struct seq_file *, struct vfsmount *); | 228 | int (*show_options)(struct seq_file *, struct dentry *); |
229 | 229 | ||
230 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); | 230 | ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); |
231 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 231 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
@@ -341,14 +341,14 @@ This describes how the VFS can manipulate an inode in your | |||
341 | filesystem. As of kernel 2.6.22, the following members are defined: | 341 | filesystem. As of kernel 2.6.22, the following members are defined: |
342 | 342 | ||
343 | struct inode_operations { | 343 | struct inode_operations { |
344 | int (*create) (struct inode *,struct dentry *,int, struct nameidata *); | 344 | int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *); |
345 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); | 345 | struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); |
346 | int (*link) (struct dentry *,struct inode *,struct dentry *); | 346 | int (*link) (struct dentry *,struct inode *,struct dentry *); |
347 | int (*unlink) (struct inode *,struct dentry *); | 347 | int (*unlink) (struct inode *,struct dentry *); |
348 | int (*symlink) (struct inode *,struct dentry *,const char *); | 348 | int (*symlink) (struct inode *,struct dentry *,const char *); |
349 | int (*mkdir) (struct inode *,struct dentry *,int); | 349 | int (*mkdir) (struct inode *,struct dentry *,umode_t); |
350 | int (*rmdir) (struct inode *,struct dentry *); | 350 | int (*rmdir) (struct inode *,struct dentry *); |
351 | int (*mknod) (struct inode *,struct dentry *,int,dev_t); | 351 | int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); |
352 | int (*rename) (struct inode *, struct dentry *, | 352 | int (*rename) (struct inode *, struct dentry *, |
353 | struct inode *, struct dentry *); | 353 | struct inode *, struct dentry *); |
354 | int (*readlink) (struct dentry *, char __user *,int); | 354 | int (*readlink) (struct dentry *, char __user *,int); |
diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus index 15ac911ce51b..d28b591753d1 100644 --- a/Documentation/hwmon/pmbus +++ b/Documentation/hwmon/pmbus | |||
@@ -2,9 +2,8 @@ Kernel driver pmbus | |||
2 | ==================== | 2 | ==================== |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | * Ericsson BMR45X series | 5 | * Ericsson BMR453, BMR454 |
6 | DC/DC Converter | 6 | Prefixes: 'bmr453', 'bmr454' |
7 | Prefixes: 'bmr450', 'bmr451', 'bmr453', 'bmr454' | ||
8 | Addresses scanned: - | 7 | Addresses scanned: - |
9 | Datasheet: | 8 | Datasheet: |
10 | http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146395 | 9 | http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146395 |
diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100 index 7617798b5c97..51f76a189fee 100644 --- a/Documentation/hwmon/zl6100 +++ b/Documentation/hwmon/zl6100 | |||
@@ -6,6 +6,10 @@ Supported chips: | |||
6 | Prefix: 'zl2004' | 6 | Prefix: 'zl2004' |
7 | Addresses scanned: - | 7 | Addresses scanned: - |
8 | Datasheet: http://www.intersil.com/data/fn/fn6847.pdf | 8 | Datasheet: http://www.intersil.com/data/fn/fn6847.pdf |
9 | * Intersil / Zilker Labs ZL2005 | ||
10 | Prefix: 'zl2005' | ||
11 | Addresses scanned: - | ||
12 | Datasheet: http://www.intersil.com/data/fn/fn6848.pdf | ||
9 | * Intersil / Zilker Labs ZL2006 | 13 | * Intersil / Zilker Labs ZL2006 |
10 | Prefix: 'zl2006' | 14 | Prefix: 'zl2006' |
11 | Addresses scanned: - | 15 | Addresses scanned: - |
@@ -30,6 +34,17 @@ Supported chips: | |||
30 | Prefix: 'zl6105' | 34 | Prefix: 'zl6105' |
31 | Addresses scanned: - | 35 | Addresses scanned: - |
32 | Datasheet: http://www.intersil.com/data/fn/fn6906.pdf | 36 | Datasheet: http://www.intersil.com/data/fn/fn6906.pdf |
37 | * Ericsson BMR450, BMR451 | ||
38 | Prefix: 'bmr450', 'bmr451' | ||
39 | Addresses scanned: - | ||
40 | Datasheet: | ||
41 | http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401 | ||
42 | * Ericsson BMR462, BMR463, BMR464 | ||
43 | Prefixes: 'bmr462', 'bmr463', 'bmr464' | ||
44 | Addresses scanned: - | ||
45 | Datasheet: | ||
46 | http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256 | ||
47 | |||
33 | 48 | ||
34 | Author: Guenter Roeck <guenter.roeck@ericsson.com> | 49 | Author: Guenter Roeck <guenter.roeck@ericsson.com> |
35 | 50 | ||
diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt new file mode 100644 index 000000000000..f274c28b5103 --- /dev/null +++ b/Documentation/input/alps.txt | |||
@@ -0,0 +1,188 @@ | |||
1 | ALPS Touchpad Protocol | ||
2 | ---------------------- | ||
3 | |||
4 | Introduction | ||
5 | ------------ | ||
6 | |||
7 | Currently the ALPS touchpad driver supports four protocol versions in use by | ||
8 | ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various | ||
9 | protocol versions is contained in the following sections. | ||
10 | |||
11 | Detection | ||
12 | --------- | ||
13 | |||
14 | All ALPS touchpads should respond to the "E6 report" command sequence: | ||
15 | E8-E6-E6-E6-E9. An ALPS touchpad should respond with either 00-00-0A or | ||
16 | 00-00-64. | ||
17 | |||
18 | If the E6 report is successful, the touchpad model is identified using the "E7 | ||
19 | report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is | ||
20 | matched against known models in the alps_model_data_array. | ||
21 | |||
22 | With protocol versions 3 and 4, the E7 report model signature is always | ||
23 | 73-02-64. To differentiate between these versions, the response from the | ||
24 | "Enter Command Mode" sequence must be inspected as described below. | ||
25 | |||
26 | Command Mode | ||
27 | ------------ | ||
28 | |||
29 | Protocol versions 3 and 4 have a command mode that is used to read and write | ||
30 | one-byte device registers in a 16-bit address space. The command sequence | ||
31 | EC-EC-EC-E9 places the device in command mode, and the device will respond | ||
32 | with 88-07 followed by a third byte. This third byte can be used to determine | ||
33 | whether the devices uses the version 3 or 4 protocol. | ||
34 | |||
35 | To exit command mode, PSMOUSE_CMD_SETSTREAM (EA) is sent to the touchpad. | ||
36 | |||
37 | While in command mode, register addresses can be set by first sending a | ||
38 | specific command, either EC for v3 devices or F5 for v4 devices. Then the | ||
39 | address is sent one nibble at a time, where each nibble is encoded as a | ||
40 | command with optional data. This enoding differs slightly between the v3 and | ||
41 | v4 protocols. | ||
42 | |||
43 | Once an address has been set, the addressed register can be read by sending | ||
44 | PSMOUSE_CMD_GETINFO (E9). The first two bytes of the response contains the | ||
45 | address of the register being read, and the third contains the value of the | ||
46 | register. Registers are written by writing the value one nibble at a time | ||
47 | using the same encoding used for addresses. | ||
48 | |||
49 | Packet Format | ||
50 | ------------- | ||
51 | |||
52 | In the following tables, the following notation is used. | ||
53 | |||
54 | CAPITALS = stick, miniscules = touchpad | ||
55 | |||
56 | ?'s can have different meanings on different models, such as wheel rotation, | ||
57 | extra buttons, stick buttons on a dualpoint, etc. | ||
58 | |||
59 | PS/2 packet format | ||
60 | ------------------ | ||
61 | |||
62 | byte 0: 0 0 YSGN XSGN 1 M R L | ||
63 | byte 1: X7 X6 X5 X4 X3 X2 X1 X0 | ||
64 | byte 2: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 | ||
65 | |||
66 | Note that the device never signals overflow condition. | ||
67 | |||
68 | ALPS Absolute Mode - Protocol Verion 1 | ||
69 | -------------------------------------- | ||
70 | |||
71 | byte 0: 1 0 0 0 1 x9 x8 x7 | ||
72 | byte 1: 0 x6 x5 x4 x3 x2 x1 x0 | ||
73 | byte 2: 0 ? ? l r ? fin ges | ||
74 | byte 3: 0 ? ? ? ? y9 y8 y7 | ||
75 | byte 4: 0 y6 y5 y4 y3 y2 y1 y0 | ||
76 | byte 5: 0 z6 z5 z4 z3 z2 z1 z0 | ||
77 | |||
78 | ALPS Absolute Mode - Protocol Version 2 | ||
79 | --------------------------------------- | ||
80 | |||
81 | byte 0: 1 ? ? ? 1 ? ? ? | ||
82 | byte 1: 0 x6 x5 x4 x3 x2 x1 x0 | ||
83 | byte 2: 0 x10 x9 x8 x7 ? fin ges | ||
84 | byte 3: 0 y9 y8 y7 1 M R L | ||
85 | byte 4: 0 y6 y5 y4 y3 y2 y1 y0 | ||
86 | byte 5: 0 z6 z5 z4 z3 z2 z1 z0 | ||
87 | |||
88 | Dualpoint device -- interleaved packet format | ||
89 | --------------------------------------------- | ||
90 | |||
91 | byte 0: 1 1 0 0 1 1 1 1 | ||
92 | byte 1: 0 x6 x5 x4 x3 x2 x1 x0 | ||
93 | byte 2: 0 x10 x9 x8 x7 0 fin ges | ||
94 | byte 3: 0 0 YSGN XSGN 1 1 1 1 | ||
95 | byte 4: X7 X6 X5 X4 X3 X2 X1 X0 | ||
96 | byte 5: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 | ||
97 | byte 6: 0 y9 y8 y7 1 m r l | ||
98 | byte 7: 0 y6 y5 y4 y3 y2 y1 y0 | ||
99 | byte 8: 0 z6 z5 z4 z3 z2 z1 z0 | ||
100 | |||
101 | ALPS Absolute Mode - Protocol Version 3 | ||
102 | --------------------------------------- | ||
103 | |||
104 | ALPS protocol version 3 has three different packet formats. The first two are | ||
105 | associated with touchpad events, and the third is associatd with trackstick | ||
106 | events. | ||
107 | |||
108 | The first type is the touchpad position packet. | ||
109 | |||
110 | byte 0: 1 ? x1 x0 1 1 1 1 | ||
111 | byte 1: 0 x10 x9 x8 x7 x6 x5 x4 | ||
112 | byte 2: 0 y10 y9 y8 y7 y6 y5 y4 | ||
113 | byte 3: 0 M R L 1 m r l | ||
114 | byte 4: 0 mt x3 x2 y3 y2 y1 y0 | ||
115 | byte 5: 0 z6 z5 z4 z3 z2 z1 z0 | ||
116 | |||
117 | Note that for some devices the trackstick buttons are reported in this packet, | ||
118 | and on others it is reported in the trackstick packets. | ||
119 | |||
120 | The second packet type contains bitmaps representing the x and y axes. In the | ||
121 | bitmaps a given bit is set if there is a finger covering that position on the | ||
122 | given axis. Thus the bitmap packet can be used for low-resolution multi-touch | ||
123 | data, although finger tracking is not possible. This packet also encodes the | ||
124 | number of contacts (f1 and f0 in the table below). | ||
125 | |||
126 | byte 0: 1 1 x1 x0 1 1 1 1 | ||
127 | byte 1: 0 x8 x7 x6 x5 x4 x3 x2 | ||
128 | byte 2: 0 y7 y6 y5 y4 y3 y2 y1 | ||
129 | byte 3: 0 y10 y9 y8 1 1 1 1 | ||
130 | byte 4: 0 x14 x13 x12 x11 x10 x9 y0 | ||
131 | byte 5: 0 1 ? ? ? ? f1 f0 | ||
132 | |||
133 | This packet only appears after a position packet with the mt bit set, and | ||
134 | ususally only appears when there are two or more contacts (although | ||
135 | ocassionally it's seen with only a single contact). | ||
136 | |||
137 | The final v3 packet type is the trackstick packet. | ||
138 | |||
139 | byte 0: 1 1 x7 y7 1 1 1 1 | ||
140 | byte 1: 0 x6 x5 x4 x3 x2 x1 x0 | ||
141 | byte 2: 0 y6 y5 y4 y3 y2 y1 y0 | ||
142 | byte 3: 0 1 0 0 1 0 0 0 | ||
143 | byte 4: 0 z4 z3 z2 z1 z0 ? ? | ||
144 | byte 5: 0 0 1 1 1 1 1 1 | ||
145 | |||
146 | ALPS Absolute Mode - Protocol Version 4 | ||
147 | --------------------------------------- | ||
148 | |||
149 | Protocol version 4 has an 8-byte packet format. | ||
150 | |||
151 | byte 0: 1 ? x1 x0 1 1 1 1 | ||
152 | byte 1: 0 x10 x9 x8 x7 x6 x5 x4 | ||
153 | byte 2: 0 y10 y9 y8 y7 y6 y5 y4 | ||
154 | byte 3: 0 1 x3 x2 y3 y2 y1 y0 | ||
155 | byte 4: 0 ? ? ? 1 ? r l | ||
156 | byte 5: 0 z6 z5 z4 z3 z2 z1 z0 | ||
157 | byte 6: bitmap data (described below) | ||
158 | byte 7: bitmap data (described below) | ||
159 | |||
160 | The last two bytes represent a partial bitmap packet, with 3 full packets | ||
161 | required to construct a complete bitmap packet. Once assembled, the 6-byte | ||
162 | bitmap packet has the following format: | ||
163 | |||
164 | byte 0: 0 1 x7 x6 x5 x4 x3 x2 | ||
165 | byte 1: 0 x1 x0 y4 y3 y2 y1 y0 | ||
166 | byte 2: 0 0 ? x14 x13 x12 x11 x10 | ||
167 | byte 3: 0 x9 x8 y9 y8 y7 y6 y5 | ||
168 | byte 4: 0 0 0 0 0 0 0 0 | ||
169 | byte 5: 0 0 0 0 0 0 0 y10 | ||
170 | |||
171 | There are several things worth noting here. | ||
172 | |||
173 | 1) In the bitmap data, bit 6 of byte 0 serves as a sync byte to | ||
174 | identify the first fragment of a bitmap packet. | ||
175 | |||
176 | 2) The bitmaps represent the same data as in the v3 bitmap packets, although | ||
177 | the packet layout is different. | ||
178 | |||
179 | 3) There doesn't seem to be a count of the contact points anywhere in the v4 | ||
180 | protocol packets. Deriving a count of contact points must be done by | ||
181 | analyzing the bitmaps. | ||
182 | |||
183 | 4) There is a 3 to 1 ratio of position packets to bitmap packets. Therefore | ||
184 | MT position can only be updated for every third ST position update, and | ||
185 | the count of contact points can only be updated every third packet as | ||
186 | well. | ||
187 | |||
188 | So far no v4 devices with tracksticks have been encountered. | ||
diff --git a/Documentation/input/gpio-tilt.txt b/Documentation/input/gpio-tilt.txt new file mode 100644 index 000000000000..06d60c3ff5e7 --- /dev/null +++ b/Documentation/input/gpio-tilt.txt | |||
@@ -0,0 +1,103 @@ | |||
1 | Driver for tilt-switches connected via GPIOs | ||
2 | ============================================ | ||
3 | |||
4 | Generic driver to read data from tilt switches connected via gpios. | ||
5 | Orientation can be provided by one or more than one tilt switches, | ||
6 | i.e. each tilt switch providing one axis, and the number of axes | ||
7 | is also not limited. | ||
8 | |||
9 | |||
10 | Data structures: | ||
11 | ---------------- | ||
12 | |||
13 | The array of struct gpio in the gpios field is used to list the gpios | ||
14 | that represent the current tilt state. | ||
15 | |||
16 | The array of struct gpio_tilt_axis describes the axes that are reported | ||
17 | to the input system. The values set therein are used for the | ||
18 | input_set_abs_params calls needed to init the axes. | ||
19 | |||
20 | The array of struct gpio_tilt_state maps gpio states to the corresponding | ||
21 | values to report. The gpio state is represented as a bitfield where the | ||
22 | bit-index corresponds to the index of the gpio in the struct gpio array. | ||
23 | In the same manner the values stored in the axes array correspond to | ||
24 | the elements of the gpio_tilt_axis-array. | ||
25 | |||
26 | |||
27 | Example: | ||
28 | -------- | ||
29 | |||
30 | Example configuration for a single TS1003 tilt switch that rotates around | ||
31 | one axis in 4 steps and emitts the current tilt via two GPIOs. | ||
32 | |||
33 | static int sg060_tilt_enable(struct device *dev) { | ||
34 | /* code to enable the sensors */ | ||
35 | }; | ||
36 | |||
37 | static void sg060_tilt_disable(struct device *dev) { | ||
38 | /* code to disable the sensors */ | ||
39 | }; | ||
40 | |||
41 | static struct gpio sg060_tilt_gpios[] = { | ||
42 | { SG060_TILT_GPIO_SENSOR1, GPIOF_IN, "tilt_sensor1" }, | ||
43 | { SG060_TILT_GPIO_SENSOR2, GPIOF_IN, "tilt_sensor2" }, | ||
44 | }; | ||
45 | |||
46 | static struct gpio_tilt_state sg060_tilt_states[] = { | ||
47 | { | ||
48 | .gpios = (0 << 1) | (0 << 0), | ||
49 | .axes = (int[]) { | ||
50 | 0, | ||
51 | }, | ||
52 | }, { | ||
53 | .gpios = (0 << 1) | (1 << 0), | ||
54 | .axes = (int[]) { | ||
55 | 1, /* 90 degrees */ | ||
56 | }, | ||
57 | }, { | ||
58 | .gpios = (1 << 1) | (1 << 0), | ||
59 | .axes = (int[]) { | ||
60 | 2, /* 180 degrees */ | ||
61 | }, | ||
62 | }, { | ||
63 | .gpios = (1 << 1) | (0 << 0), | ||
64 | .axes = (int[]) { | ||
65 | 3, /* 270 degrees */ | ||
66 | }, | ||
67 | }, | ||
68 | }; | ||
69 | |||
70 | static struct gpio_tilt_axis sg060_tilt_axes[] = { | ||
71 | { | ||
72 | .axis = ABS_RY, | ||
73 | .min = 0, | ||
74 | .max = 3, | ||
75 | .fuzz = 0, | ||
76 | .flat = 0, | ||
77 | }, | ||
78 | }; | ||
79 | |||
80 | static struct gpio_tilt_platform_data sg060_tilt_pdata= { | ||
81 | .gpios = sg060_tilt_gpios, | ||
82 | .nr_gpios = ARRAY_SIZE(sg060_tilt_gpios), | ||
83 | |||
84 | .axes = sg060_tilt_axes, | ||
85 | .nr_axes = ARRAY_SIZE(sg060_tilt_axes), | ||
86 | |||
87 | .states = sg060_tilt_states, | ||
88 | .nr_states = ARRAY_SIZE(sg060_tilt_states), | ||
89 | |||
90 | .debounce_interval = 100, | ||
91 | |||
92 | .poll_interval = 1000, | ||
93 | .enable = sg060_tilt_enable, | ||
94 | .disable = sg060_tilt_disable, | ||
95 | }; | ||
96 | |||
97 | static struct platform_device sg060_device_tilt = { | ||
98 | .name = "gpio-tilt-polled", | ||
99 | .id = -1, | ||
100 | .dev = { | ||
101 | .platform_data = &sg060_tilt_pdata, | ||
102 | }, | ||
103 | }; | ||
diff --git a/Documentation/input/sentelic.txt b/Documentation/input/sentelic.txt index b2ef125b71f8..89251e2a3eba 100644 --- a/Documentation/input/sentelic.txt +++ b/Documentation/input/sentelic.txt | |||
@@ -1,5 +1,5 @@ | |||
1 | Copyright (C) 2002-2010 Sentelic Corporation. | 1 | Copyright (C) 2002-2011 Sentelic Corporation. |
2 | Last update: Jan-13-2010 | 2 | Last update: Dec-07-2011 |
3 | 3 | ||
4 | ============================================================================== | 4 | ============================================================================== |
5 | * Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons) | 5 | * Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons) |
@@ -140,6 +140,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|--------- | |||
140 | Byte 1: Bit7~Bit6 => 00, Normal data packet | 140 | Byte 1: Bit7~Bit6 => 00, Normal data packet |
141 | => 01, Absolute coordination packet | 141 | => 01, Absolute coordination packet |
142 | => 10, Notify packet | 142 | => 10, Notify packet |
143 | => 11, Normal data packet with on-pad click | ||
143 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. | 144 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. |
144 | When both fingers are up, the last two reports have zero valid | 145 | When both fingers are up, the last two reports have zero valid |
145 | bit. | 146 | bit. |
@@ -164,6 +165,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|--------- | |||
164 | Byte 1: Bit7~Bit6 => 00, Normal data packet | 165 | Byte 1: Bit7~Bit6 => 00, Normal data packet |
165 | => 01, Absolute coordinates packet | 166 | => 01, Absolute coordinates packet |
166 | => 10, Notify packet | 167 | => 10, Notify packet |
168 | => 11, Normal data packet with on-pad click | ||
167 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. | 169 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. |
168 | When both fingers are up, the last two reports have zero valid | 170 | When both fingers are up, the last two reports have zero valid |
169 | bit. | 171 | bit. |
@@ -188,6 +190,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|--------- | |||
188 | Byte 1: Bit7~Bit6 => 00, Normal data packet | 190 | Byte 1: Bit7~Bit6 => 00, Normal data packet |
189 | => 01, Absolute coordinates packet | 191 | => 01, Absolute coordinates packet |
190 | => 10, Notify packet | 192 | => 10, Notify packet |
193 | => 11, Normal data packet with on-pad click | ||
191 | Bit5 => 1 | 194 | Bit5 => 1 |
192 | Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): | 195 | Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): |
193 | 0: left button is generated by the on-pad command | 196 | 0: left button is generated by the on-pad command |
@@ -205,7 +208,7 @@ Byte 4: Bit7 => scroll right button | |||
205 | Bit6 => scroll left button | 208 | Bit6 => scroll left button |
206 | Bit5 => scroll down button | 209 | Bit5 => scroll down button |
207 | Bit4 => scroll up button | 210 | Bit4 => scroll up button |
208 | * Note that if gesture and additional buttoni (Bit4~Bit7) | 211 | * Note that if gesture and additional button (Bit4~Bit7) |
209 | happen at the same time, the button information will not | 212 | happen at the same time, the button information will not |
210 | be sent. | 213 | be sent. |
211 | Bit3~Bit0 => Reserved | 214 | Bit3~Bit0 => Reserved |
@@ -227,6 +230,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|--------- | |||
227 | Byte 1: Bit7~Bit6 => 00, Normal data packet | 230 | Byte 1: Bit7~Bit6 => 00, Normal data packet |
228 | => 01, Absolute coordinates packet | 231 | => 01, Absolute coordinates packet |
229 | => 10, Notify packet | 232 | => 10, Notify packet |
233 | => 11, Normal data packet with on-pad click | ||
230 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. | 234 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. |
231 | When both fingers are up, the last two reports have zero valid | 235 | When both fingers are up, the last two reports have zero valid |
232 | bit. | 236 | bit. |
@@ -253,6 +257,7 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|--------- | |||
253 | Byte 1: Bit7~Bit6 => 00, Normal data packet | 257 | Byte 1: Bit7~Bit6 => 00, Normal data packet |
254 | => 01, Absolute coordination packet | 258 | => 01, Absolute coordination packet |
255 | => 10, Notify packet | 259 | => 10, Notify packet |
260 | => 11, Normal data packet with on-pad click | ||
256 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. | 261 | Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. |
257 | When both fingers are up, the last two reports have zero valid | 262 | When both fingers are up, the last two reports have zero valid |
258 | bit. | 263 | bit. |
@@ -279,8 +284,9 @@ BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|--------- | |||
279 | Byte 1: Bit7~Bit6 => 00, Normal data packet | 284 | Byte 1: Bit7~Bit6 => 00, Normal data packet |
280 | => 01, Absolute coordination packet | 285 | => 01, Absolute coordination packet |
281 | => 10, Notify packet | 286 | => 10, Notify packet |
287 | => 11, Normal data packet with on-pad click | ||
282 | Bit5 => 1 | 288 | Bit5 => 1 |
283 | Bit4 => when in absolute coordinate mode (valid when EN_PKT_GO is 1): | 289 | Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): |
284 | 0: left button is generated by the on-pad command | 290 | 0: left button is generated by the on-pad command |
285 | 1: left button is generated by the external button | 291 | 1: left button is generated by the external button |
286 | Bit3 => 1 | 292 | Bit3 => 1 |
@@ -307,6 +313,110 @@ Sample sequence of Multi-finger, Multi-coordinate mode: | |||
307 | abs pkt 2, ..., notify packet (valid bit == 0) | 313 | abs pkt 2, ..., notify packet (valid bit == 0) |
308 | 314 | ||
309 | ============================================================================== | 315 | ============================================================================== |
316 | * Absolute position for STL3888-Cx and STL3888-Dx. | ||
317 | ============================================================================== | ||
318 | Single Finger, Absolute Coordinate Mode (SFAC) | ||
319 | Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 | ||
320 | BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| | ||
321 | 1 |0|1|0|P|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y| | ||
322 | |---------------| |---------------| |---------------| |---------------| | ||
323 | |||
324 | Byte 1: Bit7~Bit6 => 00, Normal data packet | ||
325 | => 01, Absolute coordinates packet | ||
326 | => 10, Notify packet | ||
327 | Bit5 => Coordinate mode(always 0 in SFAC mode): | ||
328 | 0: single-finger absolute coordinates (SFAC) mode | ||
329 | 1: multi-finger, multiple coordinates (MFMC) mode | ||
330 | Bit4 => 0: The LEFT button is generated by on-pad command (OPC) | ||
331 | 1: The LEFT button is generated by external button | ||
332 | Default is 1 even if the LEFT button is not pressed. | ||
333 | Bit3 => Always 1, as specified by PS/2 protocol. | ||
334 | Bit2 => Middle Button, 1 is pressed, 0 is not pressed. | ||
335 | Bit1 => Right Button, 1 is pressed, 0 is not pressed. | ||
336 | Bit0 => Left Button, 1 is pressed, 0 is not pressed. | ||
337 | Byte 2: X coordinate (xpos[9:2]) | ||
338 | Byte 3: Y coordinate (ypos[9:2]) | ||
339 | Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) | ||
340 | Bit3~Bit2 => X coordinate (ypos[1:0]) | ||
341 | Bit4 => 4th mouse button(forward one page) | ||
342 | Bit5 => 5th mouse button(backward one page) | ||
343 | Bit6 => scroll left button | ||
344 | Bit7 => scroll right button | ||
345 | |||
346 | Multi Finger, Multiple Coordinates Mode (MFMC): | ||
347 | Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 | ||
348 | BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| | ||
349 | 1 |0|1|1|P|1|F|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y| | ||
350 | |---------------| |---------------| |---------------| |---------------| | ||
351 | |||
352 | Byte 1: Bit7~Bit6 => 00, Normal data packet | ||
353 | => 01, Absolute coordination packet | ||
354 | => 10, Notify packet | ||
355 | Bit5 => Coordinate mode (always 1 in MFMC mode): | ||
356 | 0: single-finger absolute coordinates (SFAC) mode | ||
357 | 1: multi-finger, multiple coordinates (MFMC) mode | ||
358 | Bit4 => 0: The LEFT button is generated by on-pad command (OPC) | ||
359 | 1: The LEFT button is generated by external button | ||
360 | Default is 1 even if the LEFT button is not pressed. | ||
361 | Bit3 => Always 1, as specified by PS/2 protocol. | ||
362 | Bit2 => Finger index, 0 is the first finger, 1 is the second finger. | ||
363 | If bit 1 and 0 are all 1 and bit 4 is 0, the middle external | ||
364 | button is pressed. | ||
365 | Bit1 => Right Button, 1 is pressed, 0 is not pressed. | ||
366 | Bit0 => Left Button, 1 is pressed, 0 is not pressed. | ||
367 | Byte 2: X coordinate (xpos[9:2]) | ||
368 | Byte 3: Y coordinate (ypos[9:2]) | ||
369 | Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) | ||
370 | Bit3~Bit2 => X coordinate (ypos[1:0]) | ||
371 | Bit4 => 4th mouse button(forward one page) | ||
372 | Bit5 => 5th mouse button(backward one page) | ||
373 | Bit6 => scroll left button | ||
374 | Bit7 => scroll right button | ||
375 | |||
376 | When one of the two fingers is up, the device will output four consecutive | ||
377 | MFMC#0 report packets with zero X and Y to represent 1st finger is up or | ||
378 | four consecutive MFMC#1 report packets with zero X and Y to represent that | ||
379 | the 2nd finger is up. On the other hand, if both fingers are up, the device | ||
380 | will output four consecutive single-finger, absolute coordinate(SFAC) packets | ||
381 | with zero X and Y. | ||
382 | |||
383 | Notify Packet for STL3888-Cx/Dx | ||
384 | Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 | ||
385 | BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| | ||
386 | 1 |1|0|0|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0| | ||
387 | |---------------| |---------------| |---------------| |---------------| | ||
388 | |||
389 | Byte 1: Bit7~Bit6 => 00, Normal data packet | ||
390 | => 01, Absolute coordinates packet | ||
391 | => 10, Notify packet | ||
392 | Bit5 => Always 0 | ||
393 | Bit4 => 0: The LEFT button is generated by on-pad command(OPC) | ||
394 | 1: The LEFT button is generated by external button | ||
395 | Default is 1 even if the LEFT button is not pressed. | ||
396 | Bit3 => 1 | ||
397 | Bit2 => Middle Button, 1 is pressed, 0 is not pressed. | ||
398 | Bit1 => Right Button, 1 is pressed, 0 is not pressed. | ||
399 | Bit0 => Left Button, 1 is pressed, 0 is not pressed. | ||
400 | Byte 2: Message type: | ||
401 | 0xba => gesture information | ||
402 | 0xc0 => one finger hold-rotating gesture | ||
403 | Byte 3: The first parameter for the received message: | ||
404 | 0xba => gesture ID (refer to the 'Gesture ID' section) | ||
405 | 0xc0 => region ID | ||
406 | Byte 4: The second parameter for the received message: | ||
407 | 0xba => N/A | ||
408 | 0xc0 => finger up/down information | ||
409 | |||
410 | Sample sequence of Multi-finger, Multi-coordinates mode: | ||
411 | |||
412 | notify packet (valid bit == 1), MFMC packet 1 (byte 1, bit 2 == 0), | ||
413 | MFMC packet 2 (byte 1, bit 2 == 1), MFMC packet 1, MFMC packet 2, | ||
414 | ..., notify packet (valid bit == 0) | ||
415 | |||
416 | That is, when the device is in MFMC mode, the host will receive | ||
417 | interleaved absolute coordinate packets for each finger. | ||
418 | |||
419 | ============================================================================== | ||
310 | * FSP Enable/Disable packet | 420 | * FSP Enable/Disable packet |
311 | ============================================================================== | 421 | ============================================================================== |
312 | Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 | 422 | Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 |
@@ -348,9 +458,10 @@ http://www.computer-engineering.org/ps2mouse/ | |||
348 | ============================================================================== | 458 | ============================================================================== |
349 | 1. Identify FSP by reading device ID(0x00) and version(0x01) register | 459 | 1. Identify FSP by reading device ID(0x00) and version(0x01) register |
350 | 460 | ||
351 | 2. Determine number of buttons by reading status2 (0x0b) register | 461 | 2a. For FSP version < STL3888 Cx, determine number of buttons by reading |
462 | the 'test mode status' (0x20) register: | ||
352 | 463 | ||
353 | buttons = reg[0x0b] & 0x30 | 464 | buttons = reg[0x20] & 0x30 |
354 | 465 | ||
355 | if buttons == 0x30 or buttons == 0x20: | 466 | if buttons == 0x30 or buttons == 0x20: |
356 | # two/four buttons | 467 | # two/four buttons |
@@ -365,6 +476,10 @@ http://www.computer-engineering.org/ps2mouse/ | |||
365 | Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' | 476 | Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' |
366 | section A for packet parsing detail | 477 | section A for packet parsing detail |
367 | 478 | ||
479 | 2b. For FSP version >= STL3888 Cx: | ||
480 | Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' | ||
481 | section A for packet parsing detail (ignore byte 4, bit ~ 7) | ||
482 | |||
368 | ============================================================================== | 483 | ============================================================================== |
369 | * Programming Sequence for Register Reading/Writing | 484 | * Programming Sequence for Register Reading/Writing |
370 | ============================================================================== | 485 | ============================================================================== |
@@ -374,7 +489,7 @@ Register inversion requirement: | |||
374 | Following values needed to be inverted(the '~' operator in C) before being | 489 | Following values needed to be inverted(the '~' operator in C) before being |
375 | sent to FSP: | 490 | sent to FSP: |
376 | 491 | ||
377 | 0xe9, 0xee, 0xf2 and 0xff. | 492 | 0xe8, 0xe9, 0xee, 0xf2, 0xf3 and 0xff. |
378 | 493 | ||
379 | Register swapping requirement: | 494 | Register swapping requirement: |
380 | 495 | ||
@@ -415,7 +530,18 @@ Register reading sequence: | |||
415 | 530 | ||
416 | 8. send 0xe9(status request) PS/2 command to FSP; | 531 | 8. send 0xe9(status request) PS/2 command to FSP; |
417 | 532 | ||
418 | 9. the response read from FSP should be the requested register value. | 533 | 9. the 4th byte of the response read from FSP should be the |
534 | requested register value(?? indicates don't care byte): | ||
535 | |||
536 | host: 0xe9 | ||
537 | 3888: 0xfa (??) (??) (val) | ||
538 | |||
539 | * Note that since the Cx release, the hardware will return 1's | ||
540 | complement of the register value at the 3rd byte of status request | ||
541 | result: | ||
542 | |||
543 | host: 0xe9 | ||
544 | 3888: 0xfa (??) (~val) (val) | ||
419 | 545 | ||
420 | Register writing sequence: | 546 | Register writing sequence: |
421 | 547 | ||
@@ -465,71 +591,194 @@ Register writing sequence: | |||
465 | 591 | ||
466 | 9. the register writing sequence is completed. | 592 | 9. the register writing sequence is completed. |
467 | 593 | ||
594 | * Note that since the Cx release, the hardware will return 1's | ||
595 | complement of the register value at the 3rd byte of status request | ||
596 | result. Host can optionally send another 0xe9 (status request) PS/2 | ||
597 | command to FSP at the end of register writing to verify that the | ||
598 | register writing operation is successful (?? indicates don't care | ||
599 | byte): | ||
600 | |||
601 | host: 0xe9 | ||
602 | 3888: 0xfa (??) (~val) (val) | ||
603 | |||
604 | ============================================================================== | ||
605 | * Programming Sequence for Page Register Reading/Writing | ||
606 | ============================================================================== | ||
607 | |||
608 | In order to overcome the limitation of maximum number of registers | ||
609 | supported, the hardware separates register into different groups called | ||
610 | 'pages.' Each page is able to include up to 255 registers. | ||
611 | |||
612 | The default page after power up is 0x82; therefore, if one has to get | ||
613 | access to register 0x8301, one has to use following sequence to switch | ||
614 | to page 0x83, then start reading/writing from/to offset 0x01 by using | ||
615 | the register read/write sequence described in previous section. | ||
616 | |||
617 | Page register reading sequence: | ||
618 | |||
619 | 1. send 0xf3 PS/2 command to FSP; | ||
620 | |||
621 | 2. send 0x66 PS/2 command to FSP; | ||
622 | |||
623 | 3. send 0x88 PS/2 command to FSP; | ||
624 | |||
625 | 4. send 0xf3 PS/2 command to FSP; | ||
626 | |||
627 | 5. send 0x83 PS/2 command to FSP; | ||
628 | |||
629 | 6. send 0x88 PS/2 command to FSP; | ||
630 | |||
631 | 7. send 0xe9(status request) PS/2 command to FSP; | ||
632 | |||
633 | 8. the response read from FSP should be the requested page value. | ||
634 | |||
635 | Page register writing sequence: | ||
636 | |||
637 | 1. send 0xf3 PS/2 command to FSP; | ||
638 | |||
639 | 2. send 0x38 PS/2 command to FSP; | ||
640 | |||
641 | 3. send 0x88 PS/2 command to FSP; | ||
642 | |||
643 | 4. send 0xf3 PS/2 command to FSP; | ||
644 | |||
645 | 5. if the page address being written is not required to be | ||
646 | inverted(refer to the 'Register inversion requirement' section), | ||
647 | goto step 6 | ||
648 | |||
649 | 5a. send 0x47 PS/2 command to FSP; | ||
650 | |||
651 | 5b. send the inverted page address to FSP and goto step 9; | ||
652 | |||
653 | 6. if the page address being written is not required to be | ||
654 | swapped(refer to the 'Register swapping requirement' section), | ||
655 | goto step 7 | ||
656 | |||
657 | 6a. send 0x44 PS/2 command to FSP; | ||
658 | |||
659 | 6b. send the swapped page address to FSP and goto step 9; | ||
660 | |||
661 | 7. send 0x33 PS/2 command to FSP; | ||
662 | |||
663 | 8. send the page address to FSP; | ||
664 | |||
665 | 9. the page register writing sequence is completed. | ||
666 | |||
667 | ============================================================================== | ||
668 | * Gesture ID | ||
669 | ============================================================================== | ||
670 | |||
671 | Unlike other devices which sends multiple fingers' coordinates to host, | ||
672 | FSP processes multiple fingers' coordinates internally and convert them | ||
673 | into a 8 bits integer, namely 'Gesture ID.' Following is a list of | ||
674 | supported gesture IDs: | ||
675 | |||
676 | ID Description | ||
677 | 0x86 2 finger straight up | ||
678 | 0x82 2 finger straight down | ||
679 | 0x80 2 finger straight right | ||
680 | 0x84 2 finger straight left | ||
681 | 0x8f 2 finger zoom in | ||
682 | 0x8b 2 finger zoom out | ||
683 | 0xc0 2 finger curve, counter clockwise | ||
684 | 0xc4 2 finger curve, clockwise | ||
685 | 0x2e 3 finger straight up | ||
686 | 0x2a 3 finger straight down | ||
687 | 0x28 3 finger straight right | ||
688 | 0x2c 3 finger straight left | ||
689 | 0x38 palm | ||
690 | |||
468 | ============================================================================== | 691 | ============================================================================== |
469 | * Register Listing | 692 | * Register Listing |
470 | ============================================================================== | 693 | ============================================================================== |
471 | 694 | ||
695 | Registers are represented in 16 bits values. The higher 8 bits represent | ||
696 | the page address and the lower 8 bits represent the relative offset within | ||
697 | that particular page. Refer to the 'Programming Sequence for Page Register | ||
698 | Reading/Writing' section for instructions on how to change current page | ||
699 | address. | ||
700 | |||
472 | offset width default r/w name | 701 | offset width default r/w name |
473 | 0x00 bit7~bit0 0x01 RO device ID | 702 | 0x8200 bit7~bit0 0x01 RO device ID |
474 | 703 | ||
475 | 0x01 bit7~bit0 0xc0 RW version ID | 704 | 0x8201 bit7~bit0 RW version ID |
705 | 0xc1: STL3888 Ax | ||
706 | 0xd0 ~ 0xd2: STL3888 Bx | ||
707 | 0xe0 ~ 0xe1: STL3888 Cx | ||
708 | 0xe2 ~ 0xe3: STL3888 Dx | ||
476 | 709 | ||
477 | 0x02 bit7~bit0 0x01 RO vendor ID | 710 | 0x8202 bit7~bit0 0x01 RO vendor ID |
478 | 711 | ||
479 | 0x03 bit7~bit0 0x01 RO product ID | 712 | 0x8203 bit7~bit0 0x01 RO product ID |
480 | 713 | ||
481 | 0x04 bit3~bit0 0x01 RW revision ID | 714 | 0x8204 bit3~bit0 0x01 RW revision ID |
482 | 715 | ||
483 | 0x0b RO test mode status 1 | 716 | 0x820b test mode status 1 |
484 | bit3 1 RO 0: rotate 180 degree, 1: no rotation | 717 | bit3 1 RO 0: rotate 180 degree |
718 | 1: no rotation | ||
719 | *only supported by H/W prior to Cx | ||
485 | 720 | ||
486 | bit5~bit4 RO number of buttons | 721 | 0x820f register file page control |
487 | 11 => 2, lbtn/rbtn | 722 | bit2 0 RW 1: rotate 180 degree |
488 | 10 => 4, lbtn/rbtn/scru/scrd | 723 | 0: no rotation |
489 | 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr | 724 | *supported since Cx |
490 | 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn | ||
491 | 725 | ||
492 | 0x0f RW register file page control | ||
493 | bit0 0 RW 1 to enable page 1 register files | 726 | bit0 0 RW 1 to enable page 1 register files |
727 | *only supported by H/W prior to Cx | ||
494 | 728 | ||
495 | 0x10 RW system control 1 | 729 | 0x8210 RW system control 1 |
496 | bit0 1 RW Reserved, must be 1 | 730 | bit0 1 RW Reserved, must be 1 |
497 | bit1 0 RW Reserved, must be 0 | 731 | bit1 0 RW Reserved, must be 0 |
498 | bit4 1 RW Reserved, must be 0 | 732 | bit4 0 RW Reserved, must be 0 |
499 | bit5 0 RW register clock gating enable | 733 | bit5 1 RW register clock gating enable |
500 | 0: read only, 1: read/write enable | 734 | 0: read only, 1: read/write enable |
501 | (Note that following registers does not require clock gating being | 735 | (Note that following registers does not require clock gating being |
502 | enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e | 736 | enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e |
503 | 40 41 42 43. In addition to that, this bit must be 1 when gesture | 737 | 40 41 42 43. In addition to that, this bit must be 1 when gesture |
504 | mode is enabled) | 738 | mode is enabled) |
505 | 739 | ||
506 | 0x31 RW on-pad command detection | 740 | 0x8220 test mode status |
741 | bit5~bit4 RO number of buttons | ||
742 | 11 => 2, lbtn/rbtn | ||
743 | 10 => 4, lbtn/rbtn/scru/scrd | ||
744 | 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr | ||
745 | 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn | ||
746 | *only supported by H/W prior to Cx | ||
747 | |||
748 | 0x8231 RW on-pad command detection | ||
507 | bit7 0 RW on-pad command left button down tag | 749 | bit7 0 RW on-pad command left button down tag |
508 | enable | 750 | enable |
509 | 0: disable, 1: enable | 751 | 0: disable, 1: enable |
752 | *only supported by H/W prior to Cx | ||
510 | 753 | ||
511 | 0x34 RW on-pad command control 5 | 754 | 0x8234 RW on-pad command control 5 |
512 | bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5 | 755 | bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5 |
513 | (Note that position unit is in 0.5 scanline) | 756 | (Note that position unit is in 0.5 scanline) |
757 | *only supported by H/W prior to Cx | ||
514 | 758 | ||
515 | bit7 0 RW on-pad tap zone enable | 759 | bit7 0 RW on-pad tap zone enable |
516 | 0: disable, 1: enable | 760 | 0: disable, 1: enable |
761 | *only supported by H/W prior to Cx | ||
517 | 762 | ||
518 | 0x35 RW on-pad command control 6 | 763 | 0x8235 RW on-pad command control 6 |
519 | bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5 | 764 | bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5 |
520 | (Note that position unit is in 0.5 scanline) | 765 | (Note that position unit is in 0.5 scanline) |
766 | *only supported by H/W prior to Cx | ||
521 | 767 | ||
522 | 0x36 RW on-pad command control 7 | 768 | 0x8236 RW on-pad command control 7 |
523 | bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5 | 769 | bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5 |
524 | (Note that position unit is in 0.5 scanline) | 770 | (Note that position unit is in 0.5 scanline) |
771 | *only supported by H/W prior to Cx | ||
525 | 772 | ||
526 | 0x37 RW on-pad command control 8 | 773 | 0x8237 RW on-pad command control 8 |
527 | bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5 | 774 | bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5 |
528 | (Note that position unit is in 0.5 scanline) | 775 | (Note that position unit is in 0.5 scanline) |
776 | *only supported by H/W prior to Cx | ||
529 | 777 | ||
530 | 0x40 RW system control 5 | 778 | 0x8240 RW system control 5 |
531 | bit1 0 RW FSP Intellimouse mode enable | 779 | bit1 0 RW FSP Intellimouse mode enable |
532 | 0: disable, 1: enable | 780 | 0: disable, 1: enable |
781 | *only supported by H/W prior to Cx | ||
533 | 782 | ||
534 | bit2 0 RW movement + abs. coordinate mode enable | 783 | bit2 0 RW movement + abs. coordinate mode enable |
535 | 0: disable, 1: enable | 784 | 0: disable, 1: enable |
@@ -537,6 +786,7 @@ offset width default r/w name | |||
537 | bit 1 is not set. However, the format is different from that of bit 1. | 786 | bit 1 is not set. However, the format is different from that of bit 1. |
538 | In addition, when bit 1 and bit 2 are set at the same time, bit 2 will | 787 | In addition, when bit 1 and bit 2 are set at the same time, bit 2 will |
539 | override bit 1.) | 788 | override bit 1.) |
789 | *only supported by H/W prior to Cx | ||
540 | 790 | ||
541 | bit3 0 RW abs. coordinate only mode enable | 791 | bit3 0 RW abs. coordinate only mode enable |
542 | 0: disable, 1: enable | 792 | 0: disable, 1: enable |
@@ -544,9 +794,11 @@ offset width default r/w name | |||
544 | bit 1 is not set. However, the format is different from that of bit 1. | 794 | bit 1 is not set. However, the format is different from that of bit 1. |
545 | In addition, when bit 1, bit 2 and bit 3 are set at the same time, | 795 | In addition, when bit 1, bit 2 and bit 3 are set at the same time, |
546 | bit 3 will override bit 1 and 2.) | 796 | bit 3 will override bit 1 and 2.) |
797 | *only supported by H/W prior to Cx | ||
547 | 798 | ||
548 | bit5 0 RW auto switch enable | 799 | bit5 0 RW auto switch enable |
549 | 0: disable, 1: enable | 800 | 0: disable, 1: enable |
801 | *only supported by H/W prior to Cx | ||
550 | 802 | ||
551 | bit6 0 RW G0 abs. + notify packet format enable | 803 | bit6 0 RW G0 abs. + notify packet format enable |
552 | 0: disable, 1: enable | 804 | 0: disable, 1: enable |
@@ -554,18 +806,68 @@ offset width default r/w name | |||
554 | bit 2 and 3. That is, if any of those bit is 1, host will receive | 806 | bit 2 and 3. That is, if any of those bit is 1, host will receive |
555 | absolute coordinates; otherwise, host only receives packets with | 807 | absolute coordinates; otherwise, host only receives packets with |
556 | relative coordinate.) | 808 | relative coordinate.) |
809 | *only supported by H/W prior to Cx | ||
557 | 810 | ||
558 | bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd | 811 | bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd |
559 | finger packet enable | 812 | finger packet enable |
560 | 0: disable, 1: enable | 813 | 0: disable, 1: enable |
814 | *only supported by H/W prior to Cx | ||
561 | 815 | ||
562 | 0x43 RW on-pad control | 816 | 0x8243 RW on-pad control |
563 | bit0 0 RW on-pad control enable | 817 | bit0 0 RW on-pad control enable |
564 | 0: disable, 1: enable | 818 | 0: disable, 1: enable |
565 | (Note that if this bit is cleared, bit 3/5 will be ineffective) | 819 | (Note that if this bit is cleared, bit 3/5 will be ineffective) |
820 | *only supported by H/W prior to Cx | ||
566 | 821 | ||
567 | bit3 0 RW on-pad fix vertical scrolling enable | 822 | bit3 0 RW on-pad fix vertical scrolling enable |
568 | 0: disable, 1: enable | 823 | 0: disable, 1: enable |
824 | *only supported by H/W prior to Cx | ||
569 | 825 | ||
570 | bit5 0 RW on-pad fix horizontal scrolling enable | 826 | bit5 0 RW on-pad fix horizontal scrolling enable |
571 | 0: disable, 1: enable | 827 | 0: disable, 1: enable |
828 | *only supported by H/W prior to Cx | ||
829 | |||
830 | 0x8290 RW software control register 1 | ||
831 | bit0 0 RW absolute coordination mode | ||
832 | 0: disable, 1: enable | ||
833 | *supported since Cx | ||
834 | |||
835 | bit1 0 RW gesture ID output | ||
836 | 0: disable, 1: enable | ||
837 | *supported since Cx | ||
838 | |||
839 | bit2 0 RW two fingers' coordinates output | ||
840 | 0: disable, 1: enable | ||
841 | *supported since Cx | ||
842 | |||
843 | bit3 0 RW finger up one packet output | ||
844 | 0: disable, 1: enable | ||
845 | *supported since Cx | ||
846 | |||
847 | bit4 0 RW absolute coordination continuous mode | ||
848 | 0: disable, 1: enable | ||
849 | *supported since Cx | ||
850 | |||
851 | bit6~bit5 00 RW gesture group selection | ||
852 | 00: basic | ||
853 | 01: suite | ||
854 | 10: suite pro | ||
855 | 11: advanced | ||
856 | *supported since Cx | ||
857 | |||
858 | bit7 0 RW Bx packet output compatible mode | ||
859 | 0: disable, 1: enable *supported since Cx | ||
860 | *supported since Cx | ||
861 | |||
862 | |||
863 | 0x833d RW on-pad command control 1 | ||
864 | bit7 1 RW on-pad command detection enable | ||
865 | 0: disable, 1: enable | ||
866 | *supported since Cx | ||
867 | |||
868 | 0x833e RW on-pad command detection | ||
869 | bit7 0 RW on-pad command left button down tag | ||
870 | enable. Works only in H/W based PS/2 | ||
871 | data packet mode. | ||
872 | 0: disable, 1: enable | ||
873 | *supported since Cx | ||
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 7a9e0b4b2903..506c7390c2b9 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt | |||
@@ -17,8 +17,8 @@ You can use common commands, such as cp and scp, to copy the | |||
17 | memory image to a dump file on the local disk, or across the network to | 17 | memory image to a dump file on the local disk, or across the network to |
18 | a remote system. | 18 | a remote system. |
19 | 19 | ||
20 | Kdump and kexec are currently supported on the x86, x86_64, ppc64 and ia64 | 20 | Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64, |
21 | architectures. | 21 | and s390x architectures. |
22 | 22 | ||
23 | When the system kernel boots, it reserves a small section of memory for | 23 | When the system kernel boots, it reserves a small section of memory for |
24 | the dump-capture kernel. This ensures that ongoing Direct Memory Access | 24 | the dump-capture kernel. This ensures that ongoing Direct Memory Access |
@@ -34,11 +34,18 @@ Similarly on PPC64 machines first 32KB of physical memory is needed for | |||
34 | booting regardless of where the kernel is loaded and to support 64K page | 34 | booting regardless of where the kernel is loaded and to support 64K page |
35 | size kexec backs up the first 64KB memory. | 35 | size kexec backs up the first 64KB memory. |
36 | 36 | ||
37 | For s390x, when kdump is triggered, the crashkernel region is exchanged | ||
38 | with the region [0, crashkernel region size] and then the kdump kernel | ||
39 | runs in [0, crashkernel region size]. Therefore no relocatable kernel is | ||
40 | needed for s390x. | ||
41 | |||
37 | All of the necessary information about the system kernel's core image is | 42 | All of the necessary information about the system kernel's core image is |
38 | encoded in the ELF format, and stored in a reserved area of memory | 43 | encoded in the ELF format, and stored in a reserved area of memory |
39 | before a crash. The physical address of the start of the ELF header is | 44 | before a crash. The physical address of the start of the ELF header is |
40 | passed to the dump-capture kernel through the elfcorehdr= boot | 45 | passed to the dump-capture kernel through the elfcorehdr= boot |
41 | parameter. | 46 | parameter. Optionally the size of the ELF header can also be passed |
47 | when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax. | ||
48 | |||
42 | 49 | ||
43 | With the dump-capture kernel, you can access the memory image, or "old | 50 | With the dump-capture kernel, you can access the memory image, or "old |
44 | memory," in two ways: | 51 | memory," in two ways: |
@@ -291,6 +298,10 @@ Boot into System Kernel | |||
291 | The region may be automatically placed on ia64, see the | 298 | The region may be automatically placed on ia64, see the |
292 | dump-capture kernel config option notes above. | 299 | dump-capture kernel config option notes above. |
293 | 300 | ||
301 | On s390x, typically use "crashkernel=xxM". The value of xx is dependent | ||
302 | on the memory consumption of the kdump system. In general this is not | ||
303 | dependent on the memory size of the production system. | ||
304 | |||
294 | Load the Dump-capture Kernel | 305 | Load the Dump-capture Kernel |
295 | ============================ | 306 | ============================ |
296 | 307 | ||
@@ -308,6 +319,8 @@ For ppc64: | |||
308 | - Use vmlinux | 319 | - Use vmlinux |
309 | For ia64: | 320 | For ia64: |
310 | - Use vmlinux or vmlinuz.gz | 321 | - Use vmlinux or vmlinuz.gz |
322 | For s390x: | ||
323 | - Use image or bzImage | ||
311 | 324 | ||
312 | 325 | ||
313 | If you are using a uncompressed vmlinux image then use following command | 326 | If you are using a uncompressed vmlinux image then use following command |
@@ -337,6 +350,8 @@ For i386, x86_64 and ia64: | |||
337 | For ppc64: | 350 | For ppc64: |
338 | "1 maxcpus=1 noirqdistrib reset_devices" | 351 | "1 maxcpus=1 noirqdistrib reset_devices" |
339 | 352 | ||
353 | For s390x: | ||
354 | "1 maxcpus=1 cgroup_disable=memory" | ||
340 | 355 | ||
341 | Notes on loading the dump-capture kernel: | 356 | Notes on loading the dump-capture kernel: |
342 | 357 | ||
@@ -362,6 +377,20 @@ Notes on loading the dump-capture kernel: | |||
362 | dump. Hence generally it is useful either to build a UP dump-capture | 377 | dump. Hence generally it is useful either to build a UP dump-capture |
363 | kernel or specify maxcpus=1 option while loading dump-capture kernel. | 378 | kernel or specify maxcpus=1 option while loading dump-capture kernel. |
364 | 379 | ||
380 | * For s390x there are two kdump modes: If a ELF header is specified with | ||
381 | the elfcorehdr= kernel parameter, it is used by the kdump kernel as it | ||
382 | is done on all other architectures. If no elfcorehdr= kernel parameter is | ||
383 | specified, the s390x kdump kernel dynamically creates the header. The | ||
384 | second mode has the advantage that for CPU and memory hotplug, kdump has | ||
385 | not to be reloaded with kexec_load(). | ||
386 | |||
387 | * For s390x systems with many attached devices the "cio_ignore" kernel | ||
388 | parameter should be used for the kdump kernel in order to prevent allocation | ||
389 | of kernel memory for devices that are not relevant for kdump. The same | ||
390 | applies to systems that use SCSI/FCP devices. In that case the | ||
391 | "allow_lun_scan" zfcp module parameter should be set to zero before | ||
392 | setting FCP devices online. | ||
393 | |||
365 | Kernel Panic | 394 | Kernel Panic |
366 | ============ | 395 | ============ |
367 | 396 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 81c287fad79d..eb93fd0ec734 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -329,6 +329,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
329 | is a lot of faster | 329 | is a lot of faster |
330 | off - do not initialize any AMD IOMMU found in | 330 | off - do not initialize any AMD IOMMU found in |
331 | the system | 331 | the system |
332 | force_isolation - Force device isolation for all | ||
333 | devices. The IOMMU driver is not | ||
334 | allowed anymore to lift isolation | ||
335 | requirements as needed. This option | ||
336 | does not override iommu=pt | ||
332 | 337 | ||
333 | amijoy.map= [HW,JOY] Amiga joystick support | 338 | amijoy.map= [HW,JOY] Amiga joystick support |
334 | Map of devices attached to JOY0DAT and JOY1DAT | 339 | Map of devices attached to JOY0DAT and JOY1DAT |
@@ -623,6 +628,25 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
623 | no_debug_objects | 628 | no_debug_objects |
624 | [KNL] Disable object debugging | 629 | [KNL] Disable object debugging |
625 | 630 | ||
631 | debug_guardpage_minorder= | ||
632 | [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this | ||
633 | parameter allows control of the order of pages that will | ||
634 | be intentionally kept free (and hence protected) by the | ||
635 | buddy allocator. Bigger value increase the probability | ||
636 | of catching random memory corruption, but reduce the | ||
637 | amount of memory for normal system use. The maximum | ||
638 | possible value is MAX_ORDER/2. Setting this parameter | ||
639 | to 1 or 2 should be enough to identify most random | ||
640 | memory corruption problems caused by bugs in kernel or | ||
641 | driver code when a CPU writes to (or reads from) a | ||
642 | random memory location. Note that there exists a class | ||
643 | of memory corruptions problems caused by buggy H/W or | ||
644 | F/W or by drivers badly programing DMA (basically when | ||
645 | memory is written at bus level and the CPU MMU is | ||
646 | bypassed) which are not detectable by | ||
647 | CONFIG_DEBUG_PAGEALLOC, hence this option will not help | ||
648 | tracking down these problems. | ||
649 | |||
626 | debugpat [X86] Enable PAT debugging | 650 | debugpat [X86] Enable PAT debugging |
627 | 651 | ||
628 | decnet.addr= [HW,NET] | 652 | decnet.addr= [HW,NET] |
@@ -1059,7 +1083,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1059 | nomerge | 1083 | nomerge |
1060 | forcesac | 1084 | forcesac |
1061 | soft | 1085 | soft |
1062 | pt [x86, IA-64] | 1086 | pt [x86, IA-64] |
1087 | group_mf [x86, IA-64] | ||
1088 | |||
1063 | 1089 | ||
1064 | io7= [HW] IO7 for Marvel based alpha systems | 1090 | io7= [HW] IO7 for Marvel based alpha systems |
1065 | See comment before marvel_specify_io7 in | 1091 | See comment before marvel_specify_io7 in |
@@ -1178,9 +1204,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1178 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. | 1204 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. |
1179 | Default is 0 (don't ignore, but inject #GP) | 1205 | Default is 0 (don't ignore, but inject #GP) |
1180 | 1206 | ||
1181 | kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging. | ||
1182 | Default is 1 (enabled) | ||
1183 | |||
1184 | kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit | 1207 | kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit |
1185 | KVM MMU at runtime. | 1208 | KVM MMU at runtime. |
1186 | Default is 0 (off) | 1209 | Default is 0 (off) |
@@ -1630,12 +1653,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1630 | The default is to return 64-bit inode numbers. | 1653 | The default is to return 64-bit inode numbers. |
1631 | 1654 | ||
1632 | nfs.nfs4_disable_idmapping= | 1655 | nfs.nfs4_disable_idmapping= |
1633 | [NFSv4] When set, this option disables the NFSv4 | 1656 | [NFSv4] When set to the default of '1', this option |
1634 | idmapper on the client, but only if the mount | 1657 | ensures that both the RPC level authentication |
1635 | is using the 'sec=sys' security flavour. This may | 1658 | scheme and the NFS level operations agree to use |
1636 | make migration from legacy NFSv2/v3 systems easier | 1659 | numeric uids/gids if the mount is using the |
1637 | provided that the server has the appropriate support. | 1660 | 'sec=sys' security flavour. In effect it is |
1638 | The default is to always enable NFSv4 idmapping. | 1661 | disabling idmapping, which can make migration from |
1662 | legacy NFSv2/v3 systems to NFSv4 easier. | ||
1663 | Servers that do not support this mode of operation | ||
1664 | will be autodetected by the client, and it will fall | ||
1665 | back to using the idmapper. | ||
1666 | To turn off this behaviour, set the value to '0'. | ||
1639 | 1667 | ||
1640 | nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take | 1668 | nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take |
1641 | when a NMI is triggered. | 1669 | when a NMI is triggered. |
@@ -1796,6 +1824,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1796 | nomfgpt [X86-32] Disable Multi-Function General Purpose | 1824 | nomfgpt [X86-32] Disable Multi-Function General Purpose |
1797 | Timer usage (for AMD Geode machines). | 1825 | Timer usage (for AMD Geode machines). |
1798 | 1826 | ||
1827 | nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to | ||
1828 | shutdown the other cpus. Instead use the REBOOT_VECTOR | ||
1829 | irq. | ||
1830 | |||
1799 | nopat [X86] Disable PAT (page attribute table extension of | 1831 | nopat [X86] Disable PAT (page attribute table extension of |
1800 | pagetables) support. | 1832 | pagetables) support. |
1801 | 1833 | ||
@@ -1885,6 +1917,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1885 | arch_perfmon: [X86] Force use of architectural | 1917 | arch_perfmon: [X86] Force use of architectural |
1886 | perfmon on Intel CPUs instead of the | 1918 | perfmon on Intel CPUs instead of the |
1887 | CPU specific event set. | 1919 | CPU specific event set. |
1920 | timer: [X86] Force use of architectural NMI | ||
1921 | timer mode (see also oprofile.timer | ||
1922 | for generic hr timer mode) | ||
1923 | [s390] Force legacy basic mode sampling | ||
1924 | (report cpu_type "timer") | ||
1888 | 1925 | ||
1889 | oops=panic Always panic on oopses. Default is to just kill the | 1926 | oops=panic Always panic on oopses. Default is to just kill the |
1890 | process, but there is a small probability of | 1927 | process, but there is a small probability of |
@@ -2362,6 +2399,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2362 | 2399 | ||
2363 | slram= [HW,MTD] | 2400 | slram= [HW,MTD] |
2364 | 2401 | ||
2402 | slab_max_order= [MM, SLAB] | ||
2403 | Determines the maximum allowed order for slabs. | ||
2404 | A high setting may cause OOMs due to memory | ||
2405 | fragmentation. Defaults to 1 for systems with | ||
2406 | more than 32MB of RAM, 0 otherwise. | ||
2407 | |||
2365 | slub_debug[=options[,slabs]] [MM, SLUB] | 2408 | slub_debug[=options[,slabs]] [MM, SLUB] |
2366 | Enabling slub_debug allows one to determine the | 2409 | Enabling slub_debug allows one to determine the |
2367 | culprit if slab objects become corrupted. Enabling | 2410 | culprit if slab objects become corrupted. Enabling |
@@ -2632,6 +2675,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2632 | [USB] Start with the old device initialization | 2675 | [USB] Start with the old device initialization |
2633 | scheme (default 0 = off). | 2676 | scheme (default 0 = off). |
2634 | 2677 | ||
2678 | usbcore.usbfs_memory_mb= | ||
2679 | [USB] Memory limit (in MB) for buffers allocated by | ||
2680 | usbfs (default = 16, 0 = max = 2047). | ||
2681 | |||
2635 | usbcore.use_both_schemes= | 2682 | usbcore.use_both_schemes= |
2636 | [USB] Try the other device initialization scheme | 2683 | [USB] Try the other device initialization scheme |
2637 | if the first one fails (default 1 = enabled). | 2684 | if the first one fails (default 1 = enabled). |
@@ -2750,11 +2797,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2750 | functions are at fixed addresses, they make nice | 2797 | functions are at fixed addresses, they make nice |
2751 | targets for exploits that can control RIP. | 2798 | targets for exploits that can control RIP. |
2752 | 2799 | ||
2753 | emulate Vsyscalls turn into traps and are emulated | 2800 | emulate [default] Vsyscalls turn into traps and are |
2754 | reasonably safely. | 2801 | emulated reasonably safely. |
2755 | 2802 | ||
2756 | native [default] Vsyscalls are native syscall | 2803 | native Vsyscalls are native syscall instructions. |
2757 | instructions. | ||
2758 | This is a little bit faster than trapping | 2804 | This is a little bit faster than trapping |
2759 | and makes a few dynamic recompilers work | 2805 | and makes a few dynamic recompilers work |
2760 | better than they would in emulation mode. | 2806 | better than they would in emulation mode. |
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt index abf768c681e2..5dbc99c04f6e 100644 --- a/Documentation/lockdep-design.txt +++ b/Documentation/lockdep-design.txt | |||
@@ -221,3 +221,66 @@ when the chain is validated for the first time, is then put into a hash | |||
221 | table, which hash-table can be checked in a lockfree manner. If the | 221 | table, which hash-table can be checked in a lockfree manner. If the |
222 | locking chain occurs again later on, the hash table tells us that we | 222 | locking chain occurs again later on, the hash table tells us that we |
223 | dont have to validate the chain again. | 223 | dont have to validate the chain again. |
224 | |||
225 | Troubleshooting: | ||
226 | ---------------- | ||
227 | |||
228 | The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes. | ||
229 | Exceeding this number will trigger the following lockdep warning: | ||
230 | |||
231 | (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) | ||
232 | |||
233 | By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical | ||
234 | desktop systems have less than 1,000 lock classes, so this warning | ||
235 | normally results from lock-class leakage or failure to properly | ||
236 | initialize locks. These two problems are illustrated below: | ||
237 | |||
238 | 1. Repeated module loading and unloading while running the validator | ||
239 | will result in lock-class leakage. The issue here is that each | ||
240 | load of the module will create a new set of lock classes for | ||
241 | that module's locks, but module unloading does not remove old | ||
242 | classes (see below discussion of reuse of lock classes for why). | ||
243 | Therefore, if that module is loaded and unloaded repeatedly, | ||
244 | the number of lock classes will eventually reach the maximum. | ||
245 | |||
246 | 2. Using structures such as arrays that have large numbers of | ||
247 | locks that are not explicitly initialized. For example, | ||
248 | a hash table with 8192 buckets where each bucket has its own | ||
249 | spinlock_t will consume 8192 lock classes -unless- each spinlock | ||
250 | is explicitly initialized at runtime, for example, using the | ||
251 | run-time spin_lock_init() as opposed to compile-time initializers | ||
252 | such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize | ||
253 | the per-bucket spinlocks would guarantee lock-class overflow. | ||
254 | In contrast, a loop that called spin_lock_init() on each lock | ||
255 | would place all 8192 locks into a single lock class. | ||
256 | |||
257 | The moral of this story is that you should always explicitly | ||
258 | initialize your locks. | ||
259 | |||
260 | One might argue that the validator should be modified to allow | ||
261 | lock classes to be reused. However, if you are tempted to make this | ||
262 | argument, first review the code and think through the changes that would | ||
263 | be required, keeping in mind that the lock classes to be removed are | ||
264 | likely to be linked into the lock-dependency graph. This turns out to | ||
265 | be harder to do than to say. | ||
266 | |||
267 | Of course, if you do run out of lock classes, the next thing to do is | ||
268 | to find the offending lock classes. First, the following command gives | ||
269 | you the number of lock classes currently in use along with the maximum: | ||
270 | |||
271 | grep "lock-classes" /proc/lockdep_stats | ||
272 | |||
273 | This command produces the following output on a modest system: | ||
274 | |||
275 | lock-classes: 748 [max: 8191] | ||
276 | |||
277 | If the number allocated (748 above) increases continually over time, | ||
278 | then there is likely a leak. The following command can be used to | ||
279 | identify the leaking lock classes: | ||
280 | |||
281 | grep "BD" /proc/lockdep | ||
282 | |||
283 | Run the command and save the output, then compare against the output from | ||
284 | a later run of this command to identify the leakers. This same output | ||
285 | can also help you find situations where runtime lock initialization has | ||
286 | been omitted. | ||
diff --git a/Documentation/md.txt b/Documentation/md.txt index fc94770f44ab..993fba37b7d1 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -357,14 +357,14 @@ Each directory contains: | |||
357 | written to, that device. | 357 | written to, that device. |
358 | 358 | ||
359 | state | 359 | state |
360 | A file recording the current state of the device in the array | 360 | A file recording the current state of the device in the array |
361 | which can be a comma separated list of | 361 | which can be a comma separated list of |
362 | faulty - device has been kicked from active use due to | 362 | faulty - device has been kicked from active use due to |
363 | a detected fault or it has unacknowledged bad | 363 | a detected fault, or it has unacknowledged bad |
364 | blocks | 364 | blocks |
365 | in_sync - device is a fully in-sync member of the array | 365 | in_sync - device is a fully in-sync member of the array |
366 | writemostly - device will only be subject to read | 366 | writemostly - device will only be subject to read |
367 | requests if there are no other options. | 367 | requests if there are no other options. |
368 | This applies only to raid1 arrays. | 368 | This applies only to raid1 arrays. |
369 | blocked - device has failed, and the failure hasn't been | 369 | blocked - device has failed, and the failure hasn't been |
370 | acknowledged yet by the metadata handler. | 370 | acknowledged yet by the metadata handler. |
@@ -374,6 +374,13 @@ Each directory contains: | |||
374 | This includes spares that are in the process | 374 | This includes spares that are in the process |
375 | of being recovered to | 375 | of being recovered to |
376 | write_error - device has ever seen a write error. | 376 | write_error - device has ever seen a write error. |
377 | want_replacement - device is (mostly) working but probably | ||
378 | should be replaced, either due to errors or | ||
379 | due to user request. | ||
380 | replacement - device is a replacement for another active | ||
381 | device with same raid_disk. | ||
382 | |||
383 | |||
377 | This list may grow in future. | 384 | This list may grow in future. |
378 | This can be written to. | 385 | This can be written to. |
379 | Writing "faulty" simulates a failure on the device. | 386 | Writing "faulty" simulates a failure on the device. |
@@ -386,6 +393,13 @@ Each directory contains: | |||
386 | Writing "in_sync" sets the in_sync flag. | 393 | Writing "in_sync" sets the in_sync flag. |
387 | Writing "write_error" sets writeerrorseen flag. | 394 | Writing "write_error" sets writeerrorseen flag. |
388 | Writing "-write_error" clears writeerrorseen flag. | 395 | Writing "-write_error" clears writeerrorseen flag. |
396 | Writing "want_replacement" is allowed at any time except to a | ||
397 | replacement device or a spare. It sets the flag. | ||
398 | Writing "-want_replacement" is allowed at any time. It clears | ||
399 | the flag. | ||
400 | Writing "replacement" or "-replacement" is only allowed before | ||
401 | starting the array. It sets or clears the flag. | ||
402 | |||
389 | 403 | ||
390 | This file responds to select/poll. Any change to 'faulty' | 404 | This file responds to select/poll. Any change to 'faulty' |
391 | or 'blocked' causes an event. | 405 | or 'blocked' causes an event. |
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index bbce1215434a..9ad9ddeb384c 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX | |||
@@ -144,6 +144,8 @@ nfc.txt | |||
144 | - The Linux Near Field Communication (NFS) subsystem. | 144 | - The Linux Near Field Communication (NFS) subsystem. |
145 | olympic.txt | 145 | olympic.txt |
146 | - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. | 146 | - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. |
147 | openvswitch.txt | ||
148 | - Open vSwitch developer documentation. | ||
147 | operstates.txt | 149 | operstates.txt |
148 | - Overview of network interface operational states. | 150 | - Overview of network interface operational states. |
149 | packet_mmap.txt | 151 | packet_mmap.txt |
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index c86d03f18a5b..221ad0cdf11f 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt | |||
@@ -200,15 +200,16 @@ abled during run time. Following log_levels are defined: | |||
200 | 200 | ||
201 | 0 - All debug output disabled | 201 | 0 - All debug output disabled |
202 | 1 - Enable messages related to routing / flooding / broadcasting | 202 | 1 - Enable messages related to routing / flooding / broadcasting |
203 | 2 - Enable route or tt entry added / changed / deleted | 203 | 2 - Enable messages related to route added / changed / deleted |
204 | 3 - Enable all messages | 204 | 4 - Enable messages related to translation table operations |
205 | 7 - Enable all messages | ||
205 | 206 | ||
206 | The debug output can be changed at runtime using the file | 207 | The debug output can be changed at runtime using the file |
207 | /sys/class/net/bat0/mesh/log_level. e.g. | 208 | /sys/class/net/bat0/mesh/log_level. e.g. |
208 | 209 | ||
209 | # echo 2 > /sys/class/net/bat0/mesh/log_level | 210 | # echo 2 > /sys/class/net/bat0/mesh/log_level |
210 | 211 | ||
211 | will enable debug messages for when routes or TTs change. | 212 | will enable debug messages for when routes change. |
212 | 213 | ||
213 | 214 | ||
214 | BATCTL | 215 | BATCTL |
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 91df678fb7f8..080ad26690ae 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt | |||
@@ -196,6 +196,23 @@ or, for backwards compatibility, the option value. E.g., | |||
196 | 196 | ||
197 | The parameters are as follows: | 197 | The parameters are as follows: |
198 | 198 | ||
199 | active_slave | ||
200 | |||
201 | Specifies the new active slave for modes that support it | ||
202 | (active-backup, balance-alb and balance-tlb). Possible values | ||
203 | are the name of any currently enslaved interface, or an empty | ||
204 | string. If a name is given, the slave and its link must be up in order | ||
205 | to be selected as the new active slave. If an empty string is | ||
206 | specified, the current active slave is cleared, and a new active | ||
207 | slave is selected automatically. | ||
208 | |||
209 | Note that this is only available through the sysfs interface. No module | ||
210 | parameter by this name exists. | ||
211 | |||
212 | The normal value of this option is the name of the currently | ||
213 | active slave, or the empty string if there is no active slave or | ||
214 | the current mode does not use an active slave. | ||
215 | |||
199 | ad_select | 216 | ad_select |
200 | 217 | ||
201 | Specifies the 802.3ad aggregation selection logic to use. The | 218 | Specifies the 802.3ad aggregation selection logic to use. The |
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index f41ea2405220..1dc1c24a7547 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt | |||
@@ -78,3 +78,30 @@ in software. This is currently WIP. | |||
78 | 78 | ||
79 | See header include/net/mac802154.h and several drivers in drivers/ieee802154/. | 79 | See header include/net/mac802154.h and several drivers in drivers/ieee802154/. |
80 | 80 | ||
81 | 6LoWPAN Linux implementation | ||
82 | ============================ | ||
83 | |||
84 | The IEEE 802.15.4 standard specifies an MTU of 128 bytes, yielding about 80 | ||
85 | octets of actual MAC payload once security is turned on, on a wireless link | ||
86 | with a link throughput of 250 kbps or less. The 6LoWPAN adaptation format | ||
87 | [RFC4944] was specified to carry IPv6 datagrams over such constrained links, | ||
88 | taking into account limited bandwidth, memory, or energy resources that are | ||
89 | expected in applications such as wireless Sensor Networks. [RFC4944] defines | ||
90 | a Mesh Addressing header to support sub-IP forwarding, a Fragmentation header | ||
91 | to support the IPv6 minimum MTU requirement [RFC2460], and stateless header | ||
92 | compression for IPv6 datagrams (LOWPAN_HC1 and LOWPAN_HC2) to reduce the | ||
93 | relatively large IPv6 and UDP headers down to (in the best case) several bytes. | ||
94 | |||
95 | In Semptember 2011 the standard update was published - [RFC6282]. | ||
96 | It deprecates HC1 and HC2 compression and defines IPHC encoding format which is | ||
97 | used in this Linux implementation. | ||
98 | |||
99 | All the code related to 6lowpan you may find in files: net/ieee802154/6lowpan.* | ||
100 | |||
101 | To setup 6lowpan interface you need (busybox release > 1.17.0): | ||
102 | 1. Add IEEE802.15.4 interface and initialize PANid; | ||
103 | 2. Add 6lowpan interface by command like: | ||
104 | # ip link add link wpan0 name lowpan0 type lowpan | ||
105 | 3. Set MAC (if needs): | ||
106 | # ip link set lowpan0 address de:ad:be:ef:ca:fe:ba:be | ||
107 | 4. Bring up 'lowpan0' interface | ||
diff --git a/Documentation/networking/ifenslave.c b/Documentation/networking/ifenslave.c index 65968fbf1e49..ac5debb2f16c 100644 --- a/Documentation/networking/ifenslave.c +++ b/Documentation/networking/ifenslave.c | |||
@@ -539,12 +539,14 @@ static int if_getconfig(char *ifname) | |||
539 | metric = 0; | 539 | metric = 0; |
540 | } else | 540 | } else |
541 | metric = ifr.ifr_metric; | 541 | metric = ifr.ifr_metric; |
542 | printf("The result of SIOCGIFMETRIC is %d\n", metric); | ||
542 | 543 | ||
543 | strcpy(ifr.ifr_name, ifname); | 544 | strcpy(ifr.ifr_name, ifname); |
544 | if (ioctl(skfd, SIOCGIFMTU, &ifr) < 0) | 545 | if (ioctl(skfd, SIOCGIFMTU, &ifr) < 0) |
545 | mtu = 0; | 546 | mtu = 0; |
546 | else | 547 | else |
547 | mtu = ifr.ifr_mtu; | 548 | mtu = ifr.ifr_mtu; |
549 | printf("The result of SIOCGIFMTU is %d\n", mtu); | ||
548 | 550 | ||
549 | strcpy(ifr.ifr_name, ifname); | 551 | strcpy(ifr.ifr_name, ifname); |
550 | if (ioctl(skfd, SIOCGIFDSTADDR, &ifr) < 0) { | 552 | if (ioctl(skfd, SIOCGIFDSTADDR, &ifr) < 0) { |
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 589f2da5d545..ad3e80e17b4f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -31,6 +31,16 @@ neigh/default/gc_thresh3 - INTEGER | |||
31 | when using large numbers of interfaces and when communicating | 31 | when using large numbers of interfaces and when communicating |
32 | with large numbers of directly-connected peers. | 32 | with large numbers of directly-connected peers. |
33 | 33 | ||
34 | neigh/default/unres_qlen_bytes - INTEGER | ||
35 | The maximum number of bytes which may be used by packets | ||
36 | queued for each unresolved address by other network layers. | ||
37 | (added in linux 3.3) | ||
38 | |||
39 | neigh/default/unres_qlen - INTEGER | ||
40 | The maximum number of packets which may be queued for each | ||
41 | unresolved address by other network layers. | ||
42 | (deprecated in linux 3.3) : use unres_qlen_bytes instead. | ||
43 | |||
34 | mtu_expires - INTEGER | 44 | mtu_expires - INTEGER |
35 | Time, in seconds, that cached PMTU information is kept. | 45 | Time, in seconds, that cached PMTU information is kept. |
36 | 46 | ||
@@ -165,6 +175,9 @@ tcp_congestion_control - STRING | |||
165 | connections. The algorithm "reno" is always available, but | 175 | connections. The algorithm "reno" is always available, but |
166 | additional choices may be available based on kernel configuration. | 176 | additional choices may be available based on kernel configuration. |
167 | Default is set as part of kernel configuration. | 177 | Default is set as part of kernel configuration. |
178 | For passive connections, the listener congestion control choice | ||
179 | is inherited. | ||
180 | [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ] | ||
168 | 181 | ||
169 | tcp_cookie_size - INTEGER | 182 | tcp_cookie_size - INTEGER |
170 | Default size of TCP Cookie Transactions (TCPCT) option, that may be | 183 | Default size of TCP Cookie Transactions (TCPCT) option, that may be |
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt new file mode 100644 index 000000000000..b8a048b8df3a --- /dev/null +++ b/Documentation/networking/openvswitch.txt | |||
@@ -0,0 +1,195 @@ | |||
1 | Open vSwitch datapath developer documentation | ||
2 | ============================================= | ||
3 | |||
4 | The Open vSwitch kernel module allows flexible userspace control over | ||
5 | flow-level packet processing on selected network devices. It can be | ||
6 | used to implement a plain Ethernet switch, network device bonding, | ||
7 | VLAN processing, network access control, flow-based network control, | ||
8 | and so on. | ||
9 | |||
10 | The kernel module implements multiple "datapaths" (analogous to | ||
11 | bridges), each of which can have multiple "vports" (analogous to ports | ||
12 | within a bridge). Each datapath also has associated with it a "flow | ||
13 | table" that userspace populates with "flows" that map from keys based | ||
14 | on packet headers and metadata to sets of actions. The most common | ||
15 | action forwards the packet to another vport; other actions are also | ||
16 | implemented. | ||
17 | |||
18 | When a packet arrives on a vport, the kernel module processes it by | ||
19 | extracting its flow key and looking it up in the flow table. If there | ||
20 | is a matching flow, it executes the associated actions. If there is | ||
21 | no match, it queues the packet to userspace for processing (as part of | ||
22 | its processing, userspace will likely set up a flow to handle further | ||
23 | packets of the same type entirely in-kernel). | ||
24 | |||
25 | |||
26 | Flow key compatibility | ||
27 | ---------------------- | ||
28 | |||
29 | Network protocols evolve over time. New protocols become important | ||
30 | and existing protocols lose their prominence. For the Open vSwitch | ||
31 | kernel module to remain relevant, it must be possible for newer | ||
32 | versions to parse additional protocols as part of the flow key. It | ||
33 | might even be desirable, someday, to drop support for parsing | ||
34 | protocols that have become obsolete. Therefore, the Netlink interface | ||
35 | to Open vSwitch is designed to allow carefully written userspace | ||
36 | applications to work with any version of the flow key, past or future. | ||
37 | |||
38 | To support this forward and backward compatibility, whenever the | ||
39 | kernel module passes a packet to userspace, it also passes along the | ||
40 | flow key that it parsed from the packet. Userspace then extracts its | ||
41 | own notion of a flow key from the packet and compares it against the | ||
42 | kernel-provided version: | ||
43 | |||
44 | - If userspace's notion of the flow key for the packet matches the | ||
45 | kernel's, then nothing special is necessary. | ||
46 | |||
47 | - If the kernel's flow key includes more fields than the userspace | ||
48 | version of the flow key, for example if the kernel decoded IPv6 | ||
49 | headers but userspace stopped at the Ethernet type (because it | ||
50 | does not understand IPv6), then again nothing special is | ||
51 | necessary. Userspace can still set up a flow in the usual way, | ||
52 | as long as it uses the kernel-provided flow key to do it. | ||
53 | |||
54 | - If the userspace flow key includes more fields than the | ||
55 | kernel's, for example if userspace decoded an IPv6 header but | ||
56 | the kernel stopped at the Ethernet type, then userspace can | ||
57 | forward the packet manually, without setting up a flow in the | ||
58 | kernel. This case is bad for performance because every packet | ||
59 | that the kernel considers part of the flow must go to userspace, | ||
60 | but the forwarding behavior is correct. (If userspace can | ||
61 | determine that the values of the extra fields would not affect | ||
62 | forwarding behavior, then it could set up a flow anyway.) | ||
63 | |||
64 | How flow keys evolve over time is important to making this work, so | ||
65 | the following sections go into detail. | ||
66 | |||
67 | |||
68 | Flow key format | ||
69 | --------------- | ||
70 | |||
71 | A flow key is passed over a Netlink socket as a sequence of Netlink | ||
72 | attributes. Some attributes represent packet metadata, defined as any | ||
73 | information about a packet that cannot be extracted from the packet | ||
74 | itself, e.g. the vport on which the packet was received. Most | ||
75 | attributes, however, are extracted from headers within the packet, | ||
76 | e.g. source and destination addresses from Ethernet, IP, or TCP | ||
77 | headers. | ||
78 | |||
79 | The <linux/openvswitch.h> header file defines the exact format of the | ||
80 | flow key attributes. For informal explanatory purposes here, we write | ||
81 | them as comma-separated strings, with parentheses indicating arguments | ||
82 | and nesting. For example, the following could represent a flow key | ||
83 | corresponding to a TCP packet that arrived on vport 1: | ||
84 | |||
85 | in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4), | ||
86 | eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0, | ||
87 | frag=no), tcp(src=49163, dst=80) | ||
88 | |||
89 | Often we ellipsize arguments not important to the discussion, e.g.: | ||
90 | |||
91 | in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...) | ||
92 | |||
93 | |||
94 | Basic rule for evolving flow keys | ||
95 | --------------------------------- | ||
96 | |||
97 | Some care is needed to really maintain forward and backward | ||
98 | compatibility for applications that follow the rules listed under | ||
99 | "Flow key compatibility" above. | ||
100 | |||
101 | The basic rule is obvious: | ||
102 | |||
103 | ------------------------------------------------------------------ | ||
104 | New network protocol support must only supplement existing flow | ||
105 | key attributes. It must not change the meaning of already defined | ||
106 | flow key attributes. | ||
107 | ------------------------------------------------------------------ | ||
108 | |||
109 | This rule does have less-obvious consequences so it is worth working | ||
110 | through a few examples. Suppose, for example, that the kernel module | ||
111 | did not already implement VLAN parsing. Instead, it just interpreted | ||
112 | the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the | ||
113 | packet. The flow key for any packet with an 802.1Q header would look | ||
114 | essentially like this, ignoring metadata: | ||
115 | |||
116 | eth(...), eth_type(0x8100) | ||
117 | |||
118 | Naively, to add VLAN support, it makes sense to add a new "vlan" flow | ||
119 | key attribute to contain the VLAN tag, then continue to decode the | ||
120 | encapsulated headers beyond the VLAN tag using the existing field | ||
121 | definitions. With this change, an TCP packet in VLAN 10 would have a | ||
122 | flow key much like this: | ||
123 | |||
124 | eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...) | ||
125 | |||
126 | But this change would negatively affect a userspace application that | ||
127 | has not been updated to understand the new "vlan" flow key attribute. | ||
128 | The application could, following the flow compatibility rules above, | ||
129 | ignore the "vlan" attribute that it does not understand and therefore | ||
130 | assume that the flow contained IP packets. This is a bad assumption | ||
131 | (the flow only contains IP packets if one parses and skips over the | ||
132 | 802.1Q header) and it could cause the application's behavior to change | ||
133 | across kernel versions even though it follows the compatibility rules. | ||
134 | |||
135 | The solution is to use a set of nested attributes. This is, for | ||
136 | example, why 802.1Q support uses nested attributes. A TCP packet in | ||
137 | VLAN 10 is actually expressed as: | ||
138 | |||
139 | eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800), | ||
140 | ip(proto=6, ...), tcp(...))) | ||
141 | |||
142 | Notice how the "eth_type", "ip", and "tcp" flow key attributes are | ||
143 | nested inside the "encap" attribute. Thus, an application that does | ||
144 | not understand the "vlan" key will not see either of those attributes | ||
145 | and therefore will not misinterpret them. (Also, the outer eth_type | ||
146 | is still 0x8100, not changed to 0x0800.) | ||
147 | |||
148 | Handling malformed packets | ||
149 | -------------------------- | ||
150 | |||
151 | Don't drop packets in the kernel for malformed protocol headers, bad | ||
152 | checksums, etc. This would prevent userspace from implementing a | ||
153 | simple Ethernet switch that forwards every packet. | ||
154 | |||
155 | Instead, in such a case, include an attribute with "empty" content. | ||
156 | It doesn't matter if the empty content could be valid protocol values, | ||
157 | as long as those values are rarely seen in practice, because userspace | ||
158 | can always forward all packets with those values to userspace and | ||
159 | handle them individually. | ||
160 | |||
161 | For example, consider a packet that contains an IP header that | ||
162 | indicates protocol 6 for TCP, but which is truncated just after the IP | ||
163 | header, so that the TCP header is missing. The flow key for this | ||
164 | packet would include a tcp attribute with all-zero src and dst, like | ||
165 | this: | ||
166 | |||
167 | eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0) | ||
168 | |||
169 | As another example, consider a packet with an Ethernet type of 0x8100, | ||
170 | indicating that a VLAN TCI should follow, but which is truncated just | ||
171 | after the Ethernet type. The flow key for this packet would include | ||
172 | an all-zero-bits vlan and an empty encap attribute, like this: | ||
173 | |||
174 | eth(...), eth_type(0x8100), vlan(0), encap() | ||
175 | |||
176 | Unlike a TCP packet with source and destination ports 0, an | ||
177 | all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka | ||
178 | VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan | ||
179 | attribute expressly to allow this situation to be distinguished. | ||
180 | Thus, the flow key in this second example unambiguously indicates a | ||
181 | missing or malformed VLAN TCI. | ||
182 | |||
183 | Other rules | ||
184 | ----------- | ||
185 | |||
186 | The other rules for flow keys are much less subtle: | ||
187 | |||
188 | - Duplicate attributes are not allowed at a given nesting level. | ||
189 | |||
190 | - Ordering of attributes is not significant. | ||
191 | |||
192 | - When the kernel sends a given flow key to userspace, it always | ||
193 | composes it the same way. This allows userspace to hash and | ||
194 | compare entire flow keys that it may not be able to fully | ||
195 | interpret. | ||
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 4acea6603720..1c08a4b0981f 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt | |||
@@ -155,7 +155,7 @@ As capture, each frame contains two parts: | |||
155 | 155 | ||
156 | /* fill sockaddr_ll struct to prepare binding */ | 156 | /* fill sockaddr_ll struct to prepare binding */ |
157 | my_addr.sll_family = AF_PACKET; | 157 | my_addr.sll_family = AF_PACKET; |
158 | my_addr.sll_protocol = ETH_P_ALL; | 158 | my_addr.sll_protocol = htons(ETH_P_ALL); |
159 | my_addr.sll_ifindex = s_ifr.ifr_ifindex; | 159 | my_addr.sll_ifindex = s_ifr.ifr_ifindex; |
160 | 160 | ||
161 | /* bind socket to eth0 */ | 161 | /* bind socket to eth0 */ |
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt index a177de21d28e..579994afbe06 100644 --- a/Documentation/networking/scaling.txt +++ b/Documentation/networking/scaling.txt | |||
@@ -208,7 +208,7 @@ The counter in rps_dev_flow_table values records the length of the current | |||
208 | CPU's backlog when a packet in this flow was last enqueued. Each backlog | 208 | CPU's backlog when a packet in this flow was last enqueued. Each backlog |
209 | queue has a head counter that is incremented on dequeue. A tail counter | 209 | queue has a head counter that is incremented on dequeue. A tail counter |
210 | is computed as head counter + queue length. In other words, the counter | 210 | is computed as head counter + queue length. In other words, the counter |
211 | in rps_dev_flow_table[i] records the last element in flow i that has | 211 | in rps_dev_flow[i] records the last element in flow i that has |
212 | been enqueued onto the currently designated CPU for flow i (of course, | 212 | been enqueued onto the currently designated CPU for flow i (of course, |
213 | entry i is actually selected by hash and multiple flows may hash to the | 213 | entry i is actually selected by hash and multiple flows may hash to the |
214 | same entry i). | 214 | same entry i). |
@@ -224,7 +224,7 @@ following is true: | |||
224 | 224 | ||
225 | - The current CPU's queue head counter >= the recorded tail counter | 225 | - The current CPU's queue head counter >= the recorded tail counter |
226 | value in rps_dev_flow[i] | 226 | value in rps_dev_flow[i] |
227 | - The current CPU is unset (equal to NR_CPUS) | 227 | - The current CPU is unset (equal to RPS_NO_CPU) |
228 | - The current CPU is offline | 228 | - The current CPU is offline |
229 | 229 | ||
230 | After this check, the packet is sent to the (possibly updated) current | 230 | After this check, the packet is sent to the (possibly updated) current |
@@ -235,7 +235,7 @@ CPU. | |||
235 | 235 | ||
236 | ==== RFS Configuration | 236 | ==== RFS Configuration |
237 | 237 | ||
238 | RFS is only available if the kconfig symbol CONFIG_RFS is enabled (on | 238 | RFS is only available if the kconfig symbol CONFIG_RPS is enabled (on |
239 | by default for SMP). The functionality remains disabled until explicitly | 239 | by default for SMP). The functionality remains disabled until explicitly |
240 | configured. The number of entries in the global flow table is set through: | 240 | configured. The number of entries in the global flow table is set through: |
241 | 241 | ||
@@ -258,7 +258,7 @@ For a single queue device, the rps_flow_cnt value for the single queue | |||
258 | would normally be configured to the same value as rps_sock_flow_entries. | 258 | would normally be configured to the same value as rps_sock_flow_entries. |
259 | For a multi-queue device, the rps_flow_cnt for each queue might be | 259 | For a multi-queue device, the rps_flow_cnt for each queue might be |
260 | configured as rps_sock_flow_entries / N, where N is the number of | 260 | configured as rps_sock_flow_entries / N, where N is the number of |
261 | queues. So for instance, if rps_flow_entries is set to 32768 and there | 261 | queues. So for instance, if rps_sock_flow_entries is set to 32768 and there |
262 | are 16 configured receive queues, rps_flow_cnt for each queue might be | 262 | are 16 configured receive queues, rps_flow_cnt for each queue might be |
263 | configured as 2048. | 263 | configured as 2048. |
264 | 264 | ||
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 8d67980fabe8..d0aeeadd264b 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt | |||
@@ -4,14 +4,16 @@ Copyright (C) 2007-2010 STMicroelectronics Ltd | |||
4 | Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> | 4 | Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> |
5 | 5 | ||
6 | This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers | 6 | This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers |
7 | (Synopsys IP blocks); it has been fully tested on STLinux platforms. | 7 | (Synopsys IP blocks). |
8 | 8 | ||
9 | Currently this network device driver is for all STM embedded MAC/GMAC | 9 | Currently this network device driver is for all STM embedded MAC/GMAC |
10 | (i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr. | 10 | (i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000 |
11 | FF1152AMT0221 D1215994A VIRTEX FPGA board. | ||
11 | 12 | ||
12 | DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100 | 13 | DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether MAC 10/100 |
13 | Universal version 4.0 have been used for developing the first code | 14 | Universal version 4.0 have been used for developing this driver. |
14 | implementation. | 15 | |
16 | This driver supports both the platform bus and PCI. | ||
15 | 17 | ||
16 | Please, for more information also visit: www.stlinux.com | 18 | Please, for more information also visit: www.stlinux.com |
17 | 19 | ||
@@ -277,5 +279,5 @@ In fact, these can generate an huge amount of debug messages. | |||
277 | 279 | ||
278 | 6) TODO: | 280 | 6) TODO: |
279 | o XGMAC is not supported. | 281 | o XGMAC is not supported. |
280 | o Review the timer optimisation code to use an embedded device that will be | 282 | o Add the EEE - Energy Efficient Ethernet |
281 | available in new chip generations. | 283 | o Add the PTP - precision time protocol |
diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt new file mode 100644 index 000000000000..5a013686b9ea --- /dev/null +++ b/Documentation/networking/team.txt | |||
@@ -0,0 +1,2 @@ | |||
1 | Team devices are driven from userspace via libteam library which is here: | ||
2 | https://github.com/jpirko/libteam | ||
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index b04cb7d45a16..6727b92bc2fb 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt | |||
@@ -7,12 +7,9 @@ This subsystem deals with: | |||
7 | 7 | ||
8 | - Multiplexing of pins, pads, fingers (etc) see below for details | 8 | - Multiplexing of pins, pads, fingers (etc) see below for details |
9 | 9 | ||
10 | The intention is to also deal with: | 10 | - Configuration of pins, pads, fingers (etc), such as software-controlled |
11 | 11 | biasing and driving mode specific pins, such as pull-up/down, open drain, | |
12 | - Software-controlled biasing and driving mode specific pins, such as | 12 | load capacitance etc. |
13 | pull-up/down, open drain etc, load capacitance configuration when controlled | ||
14 | by software, etc. | ||
15 | |||
16 | 13 | ||
17 | Top-level interface | 14 | Top-level interface |
18 | =================== | 15 | =================== |
@@ -32,7 +29,7 @@ Definition of PIN: | |||
32 | be sparse - i.e. there may be gaps in the space with numbers where no | 29 | be sparse - i.e. there may be gaps in the space with numbers where no |
33 | pin exists. | 30 | pin exists. |
34 | 31 | ||
35 | When a PIN CONTROLLER is instatiated, it will register a descriptor to the | 32 | When a PIN CONTROLLER is instantiated, it will register a descriptor to the |
36 | pin control framework, and this descriptor contains an array of pin descriptors | 33 | pin control framework, and this descriptor contains an array of pin descriptors |
37 | describing the pins handled by this specific pin controller. | 34 | describing the pins handled by this specific pin controller. |
38 | 35 | ||
@@ -61,14 +58,14 @@ this in our driver: | |||
61 | 58 | ||
62 | #include <linux/pinctrl/pinctrl.h> | 59 | #include <linux/pinctrl/pinctrl.h> |
63 | 60 | ||
64 | const struct pinctrl_pin_desc __refdata foo_pins[] = { | 61 | const struct pinctrl_pin_desc foo_pins[] = { |
65 | PINCTRL_PIN(0, "A1"), | 62 | PINCTRL_PIN(0, "A8"), |
66 | PINCTRL_PIN(1, "A2"), | 63 | PINCTRL_PIN(1, "B8"), |
67 | PINCTRL_PIN(2, "A3"), | 64 | PINCTRL_PIN(2, "C8"), |
68 | ... | 65 | ... |
69 | PINCTRL_PIN(61, "H6"), | 66 | PINCTRL_PIN(61, "F1"), |
70 | PINCTRL_PIN(62, "H7"), | 67 | PINCTRL_PIN(62, "G1"), |
71 | PINCTRL_PIN(63, "H8"), | 68 | PINCTRL_PIN(63, "H1"), |
72 | }; | 69 | }; |
73 | 70 | ||
74 | static struct pinctrl_desc foo_desc = { | 71 | static struct pinctrl_desc foo_desc = { |
@@ -88,11 +85,16 @@ int __init foo_probe(void) | |||
88 | pr_err("could not register foo pin driver\n"); | 85 | pr_err("could not register foo pin driver\n"); |
89 | } | 86 | } |
90 | 87 | ||
88 | To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and | ||
89 | selected drivers, you need to select them from your machine's Kconfig entry, | ||
90 | since these are so tightly integrated with the machines they are used on. | ||
91 | See for example arch/arm/mach-u300/Kconfig for an example. | ||
92 | |||
91 | Pins usually have fancier names than this. You can find these in the dataheet | 93 | Pins usually have fancier names than this. You can find these in the dataheet |
92 | for your chip. Notice that the core pinctrl.h file provides a fancy macro | 94 | for your chip. Notice that the core pinctrl.h file provides a fancy macro |
93 | called PINCTRL_PIN() to create the struct entries. As you can see I enumerated | 95 | called PINCTRL_PIN() to create the struct entries. As you can see I enumerated |
94 | the pins from 0 in the upper left corner to 63 in the lower right corner, | 96 | the pins from 0 in the upper left corner to 63 in the lower right corner. |
95 | this enumeration was arbitrarily chosen, in practice you need to think | 97 | This enumeration was arbitrarily chosen, in practice you need to think |
96 | through your numbering system so that it matches the layout of registers | 98 | through your numbering system so that it matches the layout of registers |
97 | and such things in your driver, or the code may become complicated. You must | 99 | and such things in your driver, or the code may become complicated. You must |
98 | also consider matching of offsets to the GPIO ranges that may be handled by | 100 | also consider matching of offsets to the GPIO ranges that may be handled by |
@@ -133,8 +135,8 @@ struct foo_group { | |||
133 | const unsigned num_pins; | 135 | const unsigned num_pins; |
134 | }; | 136 | }; |
135 | 137 | ||
136 | static unsigned int spi0_pins[] = { 0, 8, 16, 24 }; | 138 | static const unsigned int spi0_pins[] = { 0, 8, 16, 24 }; |
137 | static unsigned int i2c0_pins[] = { 24, 25 }; | 139 | static const unsigned int i2c0_pins[] = { 24, 25 }; |
138 | 140 | ||
139 | static const struct foo_group foo_groups[] = { | 141 | static const struct foo_group foo_groups[] = { |
140 | { | 142 | { |
@@ -193,6 +195,88 @@ structure, for example specific register ranges associated with each group | |||
193 | and so on. | 195 | and so on. |
194 | 196 | ||
195 | 197 | ||
198 | Pin configuration | ||
199 | ================= | ||
200 | |||
201 | Pins can sometimes be software-configured in an various ways, mostly related | ||
202 | to their electronic properties when used as inputs or outputs. For example you | ||
203 | may be able to make an output pin high impedance, or "tristate" meaning it is | ||
204 | effectively disconnected. You may be able to connect an input pin to VDD or GND | ||
205 | using a certain resistor value - pull up and pull down - so that the pin has a | ||
206 | stable value when nothing is driving the rail it is connected to, or when it's | ||
207 | unconnected. | ||
208 | |||
209 | For example, a platform may do this: | ||
210 | |||
211 | ret = pin_config_set("foo-dev", "FOO_GPIO_PIN", PLATFORM_X_PULL_UP); | ||
212 | |||
213 | To pull up a pin to VDD. The pin configuration driver implements callbacks for | ||
214 | changing pin configuration in the pin controller ops like this: | ||
215 | |||
216 | #include <linux/pinctrl/pinctrl.h> | ||
217 | #include <linux/pinctrl/pinconf.h> | ||
218 | #include "platform_x_pindefs.h" | ||
219 | |||
220 | static int foo_pin_config_get(struct pinctrl_dev *pctldev, | ||
221 | unsigned offset, | ||
222 | unsigned long *config) | ||
223 | { | ||
224 | struct my_conftype conf; | ||
225 | |||
226 | ... Find setting for pin @ offset ... | ||
227 | |||
228 | *config = (unsigned long) conf; | ||
229 | } | ||
230 | |||
231 | static int foo_pin_config_set(struct pinctrl_dev *pctldev, | ||
232 | unsigned offset, | ||
233 | unsigned long config) | ||
234 | { | ||
235 | struct my_conftype *conf = (struct my_conftype *) config; | ||
236 | |||
237 | switch (conf) { | ||
238 | case PLATFORM_X_PULL_UP: | ||
239 | ... | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | static int foo_pin_config_group_get (struct pinctrl_dev *pctldev, | ||
245 | unsigned selector, | ||
246 | unsigned long *config) | ||
247 | { | ||
248 | ... | ||
249 | } | ||
250 | |||
251 | static int foo_pin_config_group_set (struct pinctrl_dev *pctldev, | ||
252 | unsigned selector, | ||
253 | unsigned long config) | ||
254 | { | ||
255 | ... | ||
256 | } | ||
257 | |||
258 | static struct pinconf_ops foo_pconf_ops = { | ||
259 | .pin_config_get = foo_pin_config_get, | ||
260 | .pin_config_set = foo_pin_config_set, | ||
261 | .pin_config_group_get = foo_pin_config_group_get, | ||
262 | .pin_config_group_set = foo_pin_config_group_set, | ||
263 | }; | ||
264 | |||
265 | /* Pin config operations are handled by some pin controller */ | ||
266 | static struct pinctrl_desc foo_desc = { | ||
267 | ... | ||
268 | .confops = &foo_pconf_ops, | ||
269 | }; | ||
270 | |||
271 | Since some controllers have special logic for handling entire groups of pins | ||
272 | they can exploit the special whole-group pin control function. The | ||
273 | pin_config_group_set() callback is allowed to return the error code -EAGAIN, | ||
274 | for groups it does not want to handle, or if it just wants to do some | ||
275 | group-level handling and then fall through to iterate over all pins, in which | ||
276 | case each individual pin will be treated by separate pin_config_set() calls as | ||
277 | well. | ||
278 | |||
279 | |||
196 | Interaction with the GPIO subsystem | 280 | Interaction with the GPIO subsystem |
197 | =================================== | 281 | =================================== |
198 | 282 | ||
@@ -214,19 +298,20 @@ static struct pinctrl_gpio_range gpio_range_a = { | |||
214 | .name = "chip a", | 298 | .name = "chip a", |
215 | .id = 0, | 299 | .id = 0, |
216 | .base = 32, | 300 | .base = 32, |
301 | .pin_base = 32, | ||
217 | .npins = 16, | 302 | .npins = 16, |
218 | .gc = &chip_a; | 303 | .gc = &chip_a; |
219 | }; | 304 | }; |
220 | 305 | ||
221 | static struct pinctrl_gpio_range gpio_range_a = { | 306 | static struct pinctrl_gpio_range gpio_range_b = { |
222 | .name = "chip b", | 307 | .name = "chip b", |
223 | .id = 0, | 308 | .id = 0, |
224 | .base = 48, | 309 | .base = 48, |
310 | .pin_base = 64, | ||
225 | .npins = 8, | 311 | .npins = 8, |
226 | .gc = &chip_b; | 312 | .gc = &chip_b; |
227 | }; | 313 | }; |
228 | 314 | ||
229 | |||
230 | { | 315 | { |
231 | struct pinctrl_dev *pctl; | 316 | struct pinctrl_dev *pctl; |
232 | ... | 317 | ... |
@@ -235,42 +320,39 @@ static struct pinctrl_gpio_range gpio_range_a = { | |||
235 | } | 320 | } |
236 | 321 | ||
237 | So this complex system has one pin controller handling two different | 322 | So this complex system has one pin controller handling two different |
238 | GPIO chips. Chip a has 16 pins and chip b has 8 pins. They are mapped in | 323 | GPIO chips. "chip a" has 16 pins and "chip b" has 8 pins. The "chip a" and |
239 | the global GPIO pin space at: | 324 | "chip b" have different .pin_base, which means a start pin number of the |
325 | GPIO range. | ||
326 | |||
327 | The GPIO range of "chip a" starts from the GPIO base of 32 and actual | ||
328 | pin range also starts from 32. However "chip b" has different starting | ||
329 | offset for the GPIO range and pin range. The GPIO range of "chip b" starts | ||
330 | from GPIO number 48, while the pin range of "chip b" starts from 64. | ||
331 | |||
332 | We can convert a gpio number to actual pin number using this "pin_base". | ||
333 | They are mapped in the global GPIO pin space at: | ||
240 | 334 | ||
241 | chip a: [32 .. 47] | 335 | chip a: |
242 | chip b: [48 .. 55] | 336 | - GPIO range : [32 .. 47] |
337 | - pin range : [32 .. 47] | ||
338 | chip b: | ||
339 | - GPIO range : [48 .. 55] | ||
340 | - pin range : [64 .. 71] | ||
243 | 341 | ||
244 | When GPIO-specific functions in the pin control subsystem are called, these | 342 | When GPIO-specific functions in the pin control subsystem are called, these |
245 | ranges will be used to look up the apropriate pin controller by inspecting | 343 | ranges will be used to look up the appropriate pin controller by inspecting |
246 | and matching the pin to the pin ranges across all controllers. When a | 344 | and matching the pin to the pin ranges across all controllers. When a |
247 | pin controller handling the matching range is found, GPIO-specific functions | 345 | pin controller handling the matching range is found, GPIO-specific functions |
248 | will be called on that specific pin controller. | 346 | will be called on that specific pin controller. |
249 | 347 | ||
250 | For all functionalities dealing with pin biasing, pin muxing etc, the pin | 348 | For all functionalities dealing with pin biasing, pin muxing etc, the pin |
251 | controller subsystem will subtract the range's .base offset from the passed | 349 | controller subsystem will subtract the range's .base offset from the passed |
252 | in gpio pin number, and pass that on to the pin control driver, so the driver | 350 | in gpio number, and add the ranges's .pin_base offset to retrive a pin number. |
253 | will get an offset into its handled number range. Further it is also passed | 351 | After that, the subsystem passes it on to the pin control driver, so the driver |
352 | will get an pin number into its handled number range. Further it is also passed | ||
254 | the range ID value, so that the pin controller knows which range it should | 353 | the range ID value, so that the pin controller knows which range it should |
255 | deal with. | 354 | deal with. |
256 | 355 | ||
257 | For example: if a user issues pinctrl_gpio_set_foo(50), the pin control | ||
258 | subsystem will find that the second range on this pin controller matches, | ||
259 | subtract the base 48 and call the | ||
260 | pinctrl_driver_gpio_set_foo(pinctrl, range, 2) where the latter function has | ||
261 | this signature: | ||
262 | |||
263 | int pinctrl_driver_gpio_set_foo(struct pinctrl_dev *pctldev, | ||
264 | struct pinctrl_gpio_range *rangeid, | ||
265 | unsigned offset); | ||
266 | |||
267 | Now the driver knows that we want to do some GPIO-specific operation on the | ||
268 | second GPIO range handled by "chip b", at offset 2 in that specific range. | ||
269 | |||
270 | (If the GPIO subsystem is ever refactored to use a local per-GPIO controller | ||
271 | pin space, this mapping will need to be augmented accordingly.) | ||
272 | |||
273 | |||
274 | PINMUX interfaces | 356 | PINMUX interfaces |
275 | ================= | 357 | ================= |
276 | 358 | ||
@@ -438,7 +520,7 @@ you. Define enumerators only for the pins you can control if that makes sense. | |||
438 | 520 | ||
439 | Assumptions: | 521 | Assumptions: |
440 | 522 | ||
441 | We assume that the number possible function maps to pin groups is limited by | 523 | We assume that the number of possible function maps to pin groups is limited by |
442 | the hardware. I.e. we assume that there is no system where any function can be | 524 | the hardware. I.e. we assume that there is no system where any function can be |
443 | mapped to any pin, like in a phone exchange. So the available pins groups for | 525 | mapped to any pin, like in a phone exchange. So the available pins groups for |
444 | a certain function will be limited to a few choices (say up to eight or so), | 526 | a certain function will be limited to a few choices (say up to eight or so), |
@@ -585,7 +667,7 @@ int foo_list_funcs(struct pinctrl_dev *pctldev, unsigned selector) | |||
585 | 667 | ||
586 | const char *foo_get_fname(struct pinctrl_dev *pctldev, unsigned selector) | 668 | const char *foo_get_fname(struct pinctrl_dev *pctldev, unsigned selector) |
587 | { | 669 | { |
588 | return myfuncs[selector].name; | 670 | return foo_functions[selector].name; |
589 | } | 671 | } |
590 | 672 | ||
591 | static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector, | 673 | static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector, |
@@ -600,16 +682,16 @@ static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector, | |||
600 | int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, | 682 | int foo_enable(struct pinctrl_dev *pctldev, unsigned selector, |
601 | unsigned group) | 683 | unsigned group) |
602 | { | 684 | { |
603 | u8 regbit = (1 << group); | 685 | u8 regbit = (1 << selector + group); |
604 | 686 | ||
605 | writeb((readb(MUX)|regbit), MUX) | 687 | writeb((readb(MUX)|regbit), MUX) |
606 | return 0; | 688 | return 0; |
607 | } | 689 | } |
608 | 690 | ||
609 | int foo_disable(struct pinctrl_dev *pctldev, unsigned selector, | 691 | void foo_disable(struct pinctrl_dev *pctldev, unsigned selector, |
610 | unsigned group) | 692 | unsigned group) |
611 | { | 693 | { |
612 | u8 regbit = (1 << group); | 694 | u8 regbit = (1 << selector + group); |
613 | 695 | ||
614 | writeb((readb(MUX) & ~(regbit)), MUX) | 696 | writeb((readb(MUX) & ~(regbit)), MUX) |
615 | return 0; | 697 | return 0; |
@@ -647,6 +729,17 @@ All the above functions are mandatory to implement for a pinmux driver. | |||
647 | Pinmux interaction with the GPIO subsystem | 729 | Pinmux interaction with the GPIO subsystem |
648 | ========================================== | 730 | ========================================== |
649 | 731 | ||
732 | The public pinmux API contains two functions named pinmux_request_gpio() | ||
733 | and pinmux_free_gpio(). These two functions shall *ONLY* be called from | ||
734 | gpiolib-based drivers as part of their gpio_request() and | ||
735 | gpio_free() semantics. Likewise the pinmux_gpio_direction_[input|output] | ||
736 | shall only be called from within respective gpio_direction_[input|output] | ||
737 | gpiolib implementation. | ||
738 | |||
739 | NOTE that platforms and individual drivers shall *NOT* request GPIO pins to be | ||
740 | muxed in. Instead, implement a proper gpiolib driver and have that driver | ||
741 | request proper muxing for its pins. | ||
742 | |||
650 | The function list could become long, especially if you can convert every | 743 | The function list could become long, especially if you can convert every |
651 | individual pin into a GPIO pin independent of any other pins, and then try | 744 | individual pin into a GPIO pin independent of any other pins, and then try |
652 | the approach to define every pin as a function. | 745 | the approach to define every pin as a function. |
@@ -654,19 +747,24 @@ the approach to define every pin as a function. | |||
654 | In this case, the function array would become 64 entries for each GPIO | 747 | In this case, the function array would become 64 entries for each GPIO |
655 | setting and then the device functions. | 748 | setting and then the device functions. |
656 | 749 | ||
657 | For this reason there is an additional function a pinmux driver can implement | 750 | For this reason there are two functions a pinmux driver can implement |
658 | to enable only GPIO on an individual pin: .gpio_request_enable(). The same | 751 | to enable only GPIO on an individual pin: .gpio_request_enable() and |
659 | .free() function as for other functions is assumed to be usable also for | 752 | .gpio_disable_free(). |
660 | GPIO pins. | ||
661 | 753 | ||
662 | This function will pass in the affected GPIO range identified by the pin | 754 | This function will pass in the affected GPIO range identified by the pin |
663 | controller core, so you know which GPIO pins are being affected by the request | 755 | controller core, so you know which GPIO pins are being affected by the request |
664 | operation. | 756 | operation. |
665 | 757 | ||
666 | Alternatively it is fully allowed to use named functions for each GPIO | 758 | If your driver needs to have an indication from the framework of whether the |
667 | pin, the pinmux_request_gpio() will attempt to obtain the function "gpioN" | 759 | GPIO pin shall be used for input or output you can implement the |
668 | where "N" is the global GPIO pin number if no special GPIO-handler is | 760 | .gpio_set_direction() function. As described this shall be called from the |
669 | registered. | 761 | gpiolib driver and the affected GPIO range, pin offset and desired direction |
762 | will be passed along to this function. | ||
763 | |||
764 | Alternatively to using these special functions, it is fully allowed to use | ||
765 | named functions for each GPIO pin, the pinmux_request_gpio() will attempt to | ||
766 | obtain the function "gpioN" where "N" is the global GPIO pin number if no | ||
767 | special GPIO-handler is registered. | ||
670 | 768 | ||
671 | 769 | ||
672 | Pinmux board/machine configuration | 770 | Pinmux board/machine configuration |
@@ -683,19 +781,19 @@ spi on the second function mapping: | |||
683 | 781 | ||
684 | #include <linux/pinctrl/machine.h> | 782 | #include <linux/pinctrl/machine.h> |
685 | 783 | ||
686 | static struct pinmux_map pmx_mapping[] = { | 784 | static const struct pinmux_map __initdata pmx_mapping[] = { |
687 | { | 785 | { |
688 | .ctrl_dev_name = "pinctrl.0", | 786 | .ctrl_dev_name = "pinctrl-foo", |
689 | .function = "spi0", | 787 | .function = "spi0", |
690 | .dev_name = "foo-spi.0", | 788 | .dev_name = "foo-spi.0", |
691 | }, | 789 | }, |
692 | { | 790 | { |
693 | .ctrl_dev_name = "pinctrl.0", | 791 | .ctrl_dev_name = "pinctrl-foo", |
694 | .function = "i2c0", | 792 | .function = "i2c0", |
695 | .dev_name = "foo-i2c.0", | 793 | .dev_name = "foo-i2c.0", |
696 | }, | 794 | }, |
697 | { | 795 | { |
698 | .ctrl_dev_name = "pinctrl.0", | 796 | .ctrl_dev_name = "pinctrl-foo", |
699 | .function = "mmc0", | 797 | .function = "mmc0", |
700 | .dev_name = "foo-mmc.0", | 798 | .dev_name = "foo-mmc.0", |
701 | }, | 799 | }, |
@@ -714,14 +812,14 @@ for example if they are not yet instantiated or cumbersome to obtain. | |||
714 | 812 | ||
715 | You register this pinmux mapping to the pinmux subsystem by simply: | 813 | You register this pinmux mapping to the pinmux subsystem by simply: |
716 | 814 | ||
717 | ret = pinmux_register_mappings(&pmx_mapping, ARRAY_SIZE(pmx_mapping)); | 815 | ret = pinmux_register_mappings(pmx_mapping, ARRAY_SIZE(pmx_mapping)); |
718 | 816 | ||
719 | Since the above construct is pretty common there is a helper macro to make | 817 | Since the above construct is pretty common there is a helper macro to make |
720 | it even more compact which assumes you want to use pinctrl.0 and position | 818 | it even more compact which assumes you want to use pinctrl-foo and position |
721 | 0 for mapping, for example: | 819 | 0 for mapping, for example: |
722 | 820 | ||
723 | static struct pinmux_map pmx_mapping[] = { | 821 | static struct pinmux_map __initdata pmx_mapping[] = { |
724 | PINMUX_MAP_PRIMARY("I2CMAP", "i2c0", "foo-i2c.0"), | 822 | PINMUX_MAP("I2CMAP", "pinctrl-foo", "i2c0", "foo-i2c.0"), |
725 | }; | 823 | }; |
726 | 824 | ||
727 | 825 | ||
@@ -734,14 +832,14 @@ As it is possible to map a function to different groups of pins an optional | |||
734 | ... | 832 | ... |
735 | { | 833 | { |
736 | .name = "spi0-pos-A", | 834 | .name = "spi0-pos-A", |
737 | .ctrl_dev_name = "pinctrl.0", | 835 | .ctrl_dev_name = "pinctrl-foo", |
738 | .function = "spi0", | 836 | .function = "spi0", |
739 | .group = "spi0_0_grp", | 837 | .group = "spi0_0_grp", |
740 | .dev_name = "foo-spi.0", | 838 | .dev_name = "foo-spi.0", |
741 | }, | 839 | }, |
742 | { | 840 | { |
743 | .name = "spi0-pos-B", | 841 | .name = "spi0-pos-B", |
744 | .ctrl_dev_name = "pinctrl.0", | 842 | .ctrl_dev_name = "pinctrl-foo", |
745 | .function = "spi0", | 843 | .function = "spi0", |
746 | .group = "spi0_1_grp", | 844 | .group = "spi0_1_grp", |
747 | .dev_name = "foo-spi.0", | 845 | .dev_name = "foo-spi.0", |
@@ -760,44 +858,44 @@ case), we define a mapping like this: | |||
760 | ... | 858 | ... |
761 | { | 859 | { |
762 | .name "2bit" | 860 | .name "2bit" |
763 | .ctrl_dev_name = "pinctrl.0", | 861 | .ctrl_dev_name = "pinctrl-foo", |
764 | .function = "mmc0", | 862 | .function = "mmc0", |
765 | .group = "mmc0_0_grp", | 863 | .group = "mmc0_1_grp", |
766 | .dev_name = "foo-mmc.0", | 864 | .dev_name = "foo-mmc.0", |
767 | }, | 865 | }, |
768 | { | 866 | { |
769 | .name "4bit" | 867 | .name "4bit" |
770 | .ctrl_dev_name = "pinctrl.0", | 868 | .ctrl_dev_name = "pinctrl-foo", |
771 | .function = "mmc0", | 869 | .function = "mmc0", |
772 | .group = "mmc0_0_grp", | 870 | .group = "mmc0_1_grp", |
773 | .dev_name = "foo-mmc.0", | 871 | .dev_name = "foo-mmc.0", |
774 | }, | 872 | }, |
775 | { | 873 | { |
776 | .name "4bit" | 874 | .name "4bit" |
777 | .ctrl_dev_name = "pinctrl.0", | 875 | .ctrl_dev_name = "pinctrl-foo", |
778 | .function = "mmc0", | 876 | .function = "mmc0", |
779 | .group = "mmc0_1_grp", | 877 | .group = "mmc0_2_grp", |
780 | .dev_name = "foo-mmc.0", | 878 | .dev_name = "foo-mmc.0", |
781 | }, | 879 | }, |
782 | { | 880 | { |
783 | .name "8bit" | 881 | .name "8bit" |
784 | .ctrl_dev_name = "pinctrl.0", | 882 | .ctrl_dev_name = "pinctrl-foo", |
785 | .function = "mmc0", | 883 | .function = "mmc0", |
786 | .group = "mmc0_0_grp", | 884 | .group = "mmc0_1_grp", |
787 | .dev_name = "foo-mmc.0", | 885 | .dev_name = "foo-mmc.0", |
788 | }, | 886 | }, |
789 | { | 887 | { |
790 | .name "8bit" | 888 | .name "8bit" |
791 | .ctrl_dev_name = "pinctrl.0", | 889 | .ctrl_dev_name = "pinctrl-foo", |
792 | .function = "mmc0", | 890 | .function = "mmc0", |
793 | .group = "mmc0_1_grp", | 891 | .group = "mmc0_2_grp", |
794 | .dev_name = "foo-mmc.0", | 892 | .dev_name = "foo-mmc.0", |
795 | }, | 893 | }, |
796 | { | 894 | { |
797 | .name "8bit" | 895 | .name "8bit" |
798 | .ctrl_dev_name = "pinctrl.0", | 896 | .ctrl_dev_name = "pinctrl-foo", |
799 | .function = "mmc0", | 897 | .function = "mmc0", |
800 | .group = "mmc0_2_grp", | 898 | .group = "mmc0_3_grp", |
801 | .dev_name = "foo-mmc.0", | 899 | .dev_name = "foo-mmc.0", |
802 | }, | 900 | }, |
803 | ... | 901 | ... |
@@ -898,7 +996,7 @@ like this: | |||
898 | 996 | ||
899 | { | 997 | { |
900 | .name "POWERMAP" | 998 | .name "POWERMAP" |
901 | .ctrl_dev_name = "pinctrl.0", | 999 | .ctrl_dev_name = "pinctrl-foo", |
902 | .function = "power_func", | 1000 | .function = "power_func", |
903 | .hog_on_boot = true, | 1001 | .hog_on_boot = true, |
904 | }, | 1002 | }, |
diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt new file mode 100644 index 000000000000..fdcca991df30 --- /dev/null +++ b/Documentation/power/charger-manager.txt | |||
@@ -0,0 +1,163 @@ | |||
1 | Charger Manager | ||
2 | (C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL | ||
3 | |||
4 | Charger Manager provides in-kernel battery charger management that | ||
5 | requires temperature monitoring during suspend-to-RAM state | ||
6 | and where each battery may have multiple chargers attached and the userland | ||
7 | wants to look at the aggregated information of the multiple chargers. | ||
8 | |||
9 | Charger Manager is a platform_driver with power-supply-class entries. | ||
10 | An instance of Charger Manager (a platform-device created with Charger-Manager) | ||
11 | represents an independent battery with chargers. If there are multiple | ||
12 | batteries with their own chargers acting independently in a system, | ||
13 | the system may need multiple instances of Charger Manager. | ||
14 | |||
15 | 1. Introduction | ||
16 | =============== | ||
17 | |||
18 | Charger Manager supports the following: | ||
19 | |||
20 | * Support for multiple chargers (e.g., a device with USB, AC, and solar panels) | ||
21 | A system may have multiple chargers (or power sources) and some of | ||
22 | they may be activated at the same time. Each charger may have its | ||
23 | own power-supply-class and each power-supply-class can provide | ||
24 | different information about the battery status. This framework | ||
25 | aggregates charger-related information from multiple sources and | ||
26 | shows combined information as a single power-supply-class. | ||
27 | |||
28 | * Support for in suspend-to-RAM polling (with suspend_again callback) | ||
29 | While the battery is being charged and the system is in suspend-to-RAM, | ||
30 | we may need to monitor the battery health by looking at the ambient or | ||
31 | battery temperature. We can accomplish this by waking up the system | ||
32 | periodically. However, such a method wakes up devices unncessary for | ||
33 | monitoring the battery health and tasks, and user processes that are | ||
34 | supposed to be kept suspended. That, in turn, incurs unnecessary power | ||
35 | consumption and slow down charging process. Or even, such peak power | ||
36 | consumption can stop chargers in the middle of charging | ||
37 | (external power input < device power consumption), which not | ||
38 | only affects the charging time, but the lifespan of the battery. | ||
39 | |||
40 | Charger Manager provides a function "cm_suspend_again" that can be | ||
41 | used as suspend_again callback of platform_suspend_ops. If the platform | ||
42 | requires tasks other than cm_suspend_again, it may implement its own | ||
43 | suspend_again callback that calls cm_suspend_again in the middle. | ||
44 | Normally, the platform will need to resume and suspend some devices | ||
45 | that are used by Charger Manager. | ||
46 | |||
47 | 2. Global Charger-Manager Data related with suspend_again | ||
48 | ======================================================== | ||
49 | In order to setup Charger Manager with suspend-again feature | ||
50 | (in-suspend monitoring), the user should provide charger_global_desc | ||
51 | with setup_charger_manager(struct charger_global_desc *). | ||
52 | This charger_global_desc data for in-suspend monitoring is global | ||
53 | as the name suggests. Thus, the user needs to provide only once even | ||
54 | if there are multiple batteries. If there are multiple batteries, the | ||
55 | multiple instances of Charger Manager share the same charger_global_desc | ||
56 | and it will manage in-suspend monitoring for all instances of Charger Manager. | ||
57 | |||
58 | The user needs to provide all the two entries properly in order to activate | ||
59 | in-suspend monitoring: | ||
60 | |||
61 | struct charger_global_desc { | ||
62 | |||
63 | char *rtc_name; | ||
64 | : The name of rtc (e.g., "rtc0") used to wakeup the system from | ||
65 | suspend for Charger Manager. The alarm interrupt (AIE) of the rtc | ||
66 | should be able to wake up the system from suspend. Charger Manager | ||
67 | saves and restores the alarm value and use the previously-defined | ||
68 | alarm if it is going to go off earlier than Charger Manager so that | ||
69 | Charger Manager does not interfere with previously-defined alarms. | ||
70 | |||
71 | bool (*rtc_only_wakeup)(void); | ||
72 | : This callback should let CM know whether | ||
73 | the wakeup-from-suspend is caused only by the alarm of "rtc" in the | ||
74 | same struct. If there is any other wakeup source triggered the | ||
75 | wakeup, it should return false. If the "rtc" is the only wakeup | ||
76 | reason, it should return true. | ||
77 | }; | ||
78 | |||
79 | 3. How to setup suspend_again | ||
80 | ============================= | ||
81 | Charger Manager provides a function "extern bool cm_suspend_again(void)". | ||
82 | When cm_suspend_again is called, it monitors every battery. The suspend_ops | ||
83 | callback of the system's platform_suspend_ops can call cm_suspend_again | ||
84 | function to know whether Charger Manager wants to suspend again or not. | ||
85 | If there are no other devices or tasks that want to use suspend_again | ||
86 | feature, the platform_suspend_ops may directly refer to cm_suspend_again | ||
87 | for its suspend_again callback. | ||
88 | |||
89 | The cm_suspend_again() returns true (meaning "I want to suspend again") | ||
90 | if the system was woken up by Charger Manager and the polling | ||
91 | (in-suspend monitoring) results in "normal". | ||
92 | |||
93 | 4. Charger-Manager Data (struct charger_desc) | ||
94 | ============================================= | ||
95 | For each battery charged independently from other batteries (if a series of | ||
96 | batteries are charged by a single charger, they are counted as one independent | ||
97 | battery), an instance of Charger Manager is attached to it. | ||
98 | |||
99 | struct charger_desc { | ||
100 | |||
101 | char *psy_name; | ||
102 | : The power-supply-class name of the battery. Default is | ||
103 | "battery" if psy_name is NULL. Users can access the psy entries | ||
104 | at "/sys/class/power_supply/[psy_name]/". | ||
105 | |||
106 | enum polling_modes polling_mode; | ||
107 | : CM_POLL_DISABLE: do not poll this battery. | ||
108 | CM_POLL_ALWAYS: always poll this battery. | ||
109 | CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if | ||
110 | an external power source is attached. | ||
111 | CM_POLL_CHARGING_ONLY: poll this battery if and only if the | ||
112 | battery is being charged. | ||
113 | |||
114 | unsigned int fullbatt_uV; | ||
115 | : If specified with a non-zero value, Charger Manager assumes | ||
116 | that the battery is full (capacity = 100) if the battery is not being | ||
117 | charged and the battery voltage is equal to or greater than | ||
118 | fullbatt_uV. | ||
119 | |||
120 | unsigned int polling_interval_ms; | ||
121 | : Required polling interval in ms. Charger Manager will poll | ||
122 | this battery every polling_interval_ms or more frequently. | ||
123 | |||
124 | enum data_source battery_present; | ||
125 | CM_FUEL_GAUGE: get battery presence information from fuel gauge. | ||
126 | CM_CHARGER_STAT: get battery presence from chargers. | ||
127 | |||
128 | char **psy_charger_stat; | ||
129 | : An array ending with NULL that has power-supply-class names of | ||
130 | chargers. Each power-supply-class should provide "PRESENT" (if | ||
131 | battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an | ||
132 | external power source is attached or not), and "STATUS" (shows whether | ||
133 | the battery is {"FULL" or not FULL} or {"FULL", "Charging", | ||
134 | "Discharging", "NotCharging"}). | ||
135 | |||
136 | int num_charger_regulators; | ||
137 | struct regulator_bulk_data *charger_regulators; | ||
138 | : Regulators representing the chargers in the form for | ||
139 | regulator framework's bulk functions. | ||
140 | |||
141 | char *psy_fuel_gauge; | ||
142 | : Power-supply-class name of the fuel gauge. | ||
143 | |||
144 | int (*temperature_out_of_range)(int *mC); | ||
145 | bool measure_battery_temp; | ||
146 | : This callback returns 0 if the temperature is safe for charging, | ||
147 | a positive number if it is too hot to charge, and a negative number | ||
148 | if it is too cold to charge. With the variable mC, the callback returns | ||
149 | the temperature in 1/1000 of centigrade. | ||
150 | The source of temperature can be battery or ambient one according to | ||
151 | the value of measure_battery_temp. | ||
152 | }; | ||
153 | |||
154 | 5. Other Considerations | ||
155 | ======================= | ||
156 | |||
157 | At the charger/battery-related events such as battery-pulled-out, | ||
158 | charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped, | ||
159 | and others critical to chargers, the system should be configured to wake up. | ||
160 | At least the following should wake up the system from a suspend: | ||
161 | a) charger-on/off b) external-power-in/out c) battery-in/out (while charging) | ||
162 | |||
163 | It is usually accomplished by configuring the PMIC as a wakeup source. | ||
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 3139fb505dce..20af7def23c8 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt | |||
@@ -126,7 +126,9 @@ The core methods to suspend and resume devices reside in struct dev_pm_ops | |||
126 | pointed to by the ops member of struct dev_pm_domain, or by the pm member of | 126 | pointed to by the ops member of struct dev_pm_domain, or by the pm member of |
127 | struct bus_type, struct device_type and struct class. They are mostly of | 127 | struct bus_type, struct device_type and struct class. They are mostly of |
128 | interest to the people writing infrastructure for platforms and buses, like PCI | 128 | interest to the people writing infrastructure for platforms and buses, like PCI |
129 | or USB, or device type and device class drivers. | 129 | or USB, or device type and device class drivers. They also are relevant to the |
130 | writers of device drivers whose subsystems (PM domains, device types, device | ||
131 | classes and bus types) don't provide all power management methods. | ||
130 | 132 | ||
131 | Bus drivers implement these methods as appropriate for the hardware and the | 133 | Bus drivers implement these methods as appropriate for the hardware and the |
132 | drivers using it; PCI works differently from USB, and so on. Not many people | 134 | drivers using it; PCI works differently from USB, and so on. Not many people |
@@ -268,32 +270,35 @@ various phases always run after tasks have been frozen and before they are | |||
268 | unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have | 270 | unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have |
269 | been disabled (except for those marked with the IRQF_NO_SUSPEND flag). | 271 | been disabled (except for those marked with the IRQF_NO_SUSPEND flag). |
270 | 272 | ||
271 | All phases use PM domain, bus, type, or class callbacks (that is, methods | 273 | All phases use PM domain, bus, type, class or driver callbacks (that is, methods |
272 | defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, or dev->class->pm). | 274 | defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or |
273 | These callbacks are regarded by the PM core as mutually exclusive. Moreover, | 275 | dev->driver->pm). These callbacks are regarded by the PM core as mutually |
274 | PM domain callbacks always take precedence over bus, type and class callbacks, | 276 | exclusive. Moreover, PM domain callbacks always take precedence over all of the |
275 | while type callbacks take precedence over bus and class callbacks, and class | 277 | other callbacks and, for example, type callbacks take precedence over bus, class |
276 | callbacks take precedence over bus callbacks. To be precise, the following | 278 | and driver callbacks. To be precise, the following rules are used to determine |
277 | rules are used to determine which callback to execute in the given phase: | 279 | which callback to execute in the given phase: |
278 | 280 | ||
279 | 1. If dev->pm_domain is present, the PM core will attempt to execute the | 281 | 1. If dev->pm_domain is present, the PM core will choose the callback |
280 | callback included in dev->pm_domain->ops. If that callback is not | 282 | included in dev->pm_domain->ops for execution |
281 | present, no action will be carried out for the given device. | ||
282 | 283 | ||
283 | 2. Otherwise, if both dev->type and dev->type->pm are present, the callback | 284 | 2. Otherwise, if both dev->type and dev->type->pm are present, the callback |
284 | included in dev->type->pm will be executed. | 285 | included in dev->type->pm will be chosen for execution. |
285 | 286 | ||
286 | 3. Otherwise, if both dev->class and dev->class->pm are present, the | 287 | 3. Otherwise, if both dev->class and dev->class->pm are present, the |
287 | callback included in dev->class->pm will be executed. | 288 | callback included in dev->class->pm will be chosen for execution. |
288 | 289 | ||
289 | 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback | 290 | 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback |
290 | included in dev->bus->pm will be executed. | 291 | included in dev->bus->pm will be chosen for execution. |
291 | 292 | ||
292 | This allows PM domains and device types to override callbacks provided by bus | 293 | This allows PM domains and device types to override callbacks provided by bus |
293 | types or device classes if necessary. | 294 | types or device classes if necessary. |
294 | 295 | ||
295 | These callbacks may in turn invoke device- or driver-specific methods stored in | 296 | The PM domain, type, class and bus callbacks may in turn invoke device- or |
296 | dev->driver->pm, but they don't have to. | 297 | driver-specific methods stored in dev->driver->pm, but they don't have to do |
298 | that. | ||
299 | |||
300 | If the subsystem callback chosen for execution is not present, the PM core will | ||
301 | execute the corresponding method from dev->driver->pm instead if there is one. | ||
297 | 302 | ||
298 | 303 | ||
299 | Entering System Suspend | 304 | Entering System Suspend |
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt index 316c2ba187f4..6ccb68f68da6 100644 --- a/Documentation/power/freezing-of-tasks.txt +++ b/Documentation/power/freezing-of-tasks.txt | |||
@@ -21,7 +21,7 @@ freeze_processes() (defined in kernel/power/process.c) is called. It executes | |||
21 | try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and | 21 | try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and |
22 | either wakes them up, if they are kernel threads, or sends fake signals to them, | 22 | either wakes them up, if they are kernel threads, or sends fake signals to them, |
23 | if they are user space processes. A task that has TIF_FREEZE set, should react | 23 | if they are user space processes. A task that has TIF_FREEZE set, should react |
24 | to it by calling the function called refrigerator() (defined in | 24 | to it by calling the function called __refrigerator() (defined in |
25 | kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state | 25 | kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state |
26 | to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it. | 26 | to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it. |
27 | Then, we say that the task is 'frozen' and therefore the set of functions | 27 | Then, we say that the task is 'frozen' and therefore the set of functions |
@@ -29,10 +29,10 @@ handling this mechanism is referred to as 'the freezer' (these functions are | |||
29 | defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). | 29 | defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). |
30 | User space processes are generally frozen before kernel threads. | 30 | User space processes are generally frozen before kernel threads. |
31 | 31 | ||
32 | It is not recommended to call refrigerator() directly. Instead, it is | 32 | __refrigerator() must not be called directly. Instead, use the |
33 | recommended to use the try_to_freeze() function (defined in | 33 | try_to_freeze() function (defined in include/linux/freezer.h), that checks |
34 | include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the | 34 | the task's TIF_FREEZE flag and makes the task enter __refrigerator() if the |
35 | task enter refrigerator() if the flag is set. | 35 | flag is set. |
36 | 36 | ||
37 | For user space processes try_to_freeze() is called automatically from the | 37 | For user space processes try_to_freeze() is called automatically from the |
38 | signal-handling code, but the freezable kernel threads need to call it | 38 | signal-handling code, but the freezable kernel threads need to call it |
@@ -61,13 +61,13 @@ wait_event_freezable() and wait_event_freezable_timeout() macros. | |||
61 | After the system memory state has been restored from a hibernation image and | 61 | After the system memory state has been restored from a hibernation image and |
62 | devices have been reinitialized, the function thaw_processes() is called in | 62 | devices have been reinitialized, the function thaw_processes() is called in |
63 | order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that | 63 | order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that |
64 | have been frozen leave refrigerator() and continue running. | 64 | have been frozen leave __refrigerator() and continue running. |
65 | 65 | ||
66 | III. Which kernel threads are freezable? | 66 | III. Which kernel threads are freezable? |
67 | 67 | ||
68 | Kernel threads are not freezable by default. However, a kernel thread may clear | 68 | Kernel threads are not freezable by default. However, a kernel thread may clear |
69 | PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE | 69 | PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE |
70 | directly is strongly discouraged). From this point it is regarded as freezable | 70 | directly is not allowed). From this point it is regarded as freezable |
71 | and must call try_to_freeze() in a suitable place. | 71 | and must call try_to_freeze() in a suitable place. |
72 | 72 | ||
73 | IV. Why do we do that? | 73 | IV. Why do we do that? |
@@ -176,3 +176,28 @@ tasks, since it generally exists anyway. | |||
176 | A driver must have all firmwares it may need in RAM before suspend() is called. | 176 | A driver must have all firmwares it may need in RAM before suspend() is called. |
177 | If keeping them is not practical, for example due to their size, they must be | 177 | If keeping them is not practical, for example due to their size, they must be |
178 | requested early enough using the suspend notifier API described in notifiers.txt. | 178 | requested early enough using the suspend notifier API described in notifiers.txt. |
179 | |||
180 | VI. Are there any precautions to be taken to prevent freezing failures? | ||
181 | |||
182 | Yes, there are. | ||
183 | |||
184 | First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code | ||
185 | from system-wide sleep such as suspend/hibernation is not encouraged. | ||
186 | If possible, that piece of code must instead hook onto the suspend/hibernation | ||
187 | notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code | ||
188 | (kernel/cpu.c) for an example. | ||
189 | |||
190 | However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary, | ||
191 | it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since | ||
192 | that could lead to freezing failures, because if the suspend/hibernate code | ||
193 | successfully acquired the 'pm_mutex' lock, and hence that other entity failed | ||
194 | to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE | ||
195 | state. As a consequence, the freezer would not be able to freeze that task, | ||
196 | leading to freezing failure. | ||
197 | |||
198 | However, the [un]lock_system_sleep() APIs are safe to use in this scenario, | ||
199 | since they ask the freezer to skip freezing this task, since it is anyway | ||
200 | "frozen enough" as it is blocked on 'pm_mutex', which will be released | ||
201 | only after the entire suspend/hibernation sequence is complete. | ||
202 | So, to summarize, use [un]lock_system_sleep() instead of directly using | ||
203 | mutex_[un]lock(&pm_mutex). That would prevent freezing failures. | ||
diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt index 3f8b528f237e..e272d9909e39 100644 --- a/Documentation/power/regulator/regulator.txt +++ b/Documentation/power/regulator/regulator.txt | |||
@@ -12,7 +12,7 @@ Drivers can register a regulator by calling :- | |||
12 | 12 | ||
13 | struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, | 13 | struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, |
14 | struct device *dev, struct regulator_init_data *init_data, | 14 | struct device *dev, struct regulator_init_data *init_data, |
15 | void *driver_data); | 15 | void *driver_data, struct device_node *of_node); |
16 | 16 | ||
17 | This will register the regulators capabilities and operations to the regulator | 17 | This will register the regulators capabilities and operations to the regulator |
18 | core. | 18 | core. |
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index c2ae8bf77d46..4abe83e1045a 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt | |||
@@ -57,6 +57,10 @@ the following: | |||
57 | 57 | ||
58 | 4. Bus type of the device, if both dev->bus and dev->bus->pm are present. | 58 | 4. Bus type of the device, if both dev->bus and dev->bus->pm are present. |
59 | 59 | ||
60 | If the subsystem chosen by applying the above rules doesn't provide the relevant | ||
61 | callback, the PM core will invoke the corresponding driver callback stored in | ||
62 | dev->driver->pm directly (if present). | ||
63 | |||
60 | The PM core always checks which callback to use in the order given above, so the | 64 | The PM core always checks which callback to use in the order given above, so the |
61 | priority order of callbacks from high to low is: PM domain, device type, class | 65 | priority order of callbacks from high to low is: PM domain, device type, class |
62 | and bus type. Moreover, the high-priority one will always take precedence over | 66 | and bus type. Moreover, the high-priority one will always take precedence over |
@@ -64,86 +68,88 @@ a low-priority one. The PM domain, bus type, device type and class callbacks | |||
64 | are referred to as subsystem-level callbacks in what follows. | 68 | are referred to as subsystem-level callbacks in what follows. |
65 | 69 | ||
66 | By default, the callbacks are always invoked in process context with interrupts | 70 | By default, the callbacks are always invoked in process context with interrupts |
67 | enabled. However, subsystems can use the pm_runtime_irq_safe() helper function | 71 | enabled. However, the pm_runtime_irq_safe() helper function can be used to tell |
68 | to tell the PM core that their ->runtime_suspend(), ->runtime_resume() and | 72 | the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume() |
69 | ->runtime_idle() callbacks may be invoked in atomic context with interrupts | 73 | and ->runtime_idle() callbacks for the given device in atomic context with |
70 | disabled for a given device. This implies that the callback routines in | 74 | interrupts disabled. This implies that the callback routines in question must |
71 | question must not block or sleep, but it also means that the synchronous helper | 75 | not block or sleep, but it also means that the synchronous helper functions |
72 | functions listed at the end of Section 4 may be used for that device within an | 76 | listed at the end of Section 4 may be used for that device within an interrupt |
73 | interrupt handler or generally in an atomic context. | 77 | handler or generally in an atomic context. |
74 | 78 | ||
75 | The subsystem-level suspend callback is _entirely_ _responsible_ for handling | 79 | The subsystem-level suspend callback, if present, is _entirely_ _responsible_ |
76 | the suspend of the device as appropriate, which may, but need not include | 80 | for handling the suspend of the device as appropriate, which may, but need not |
77 | executing the device driver's own ->runtime_suspend() callback (from the | 81 | include executing the device driver's own ->runtime_suspend() callback (from the |
78 | PM core's point of view it is not necessary to implement a ->runtime_suspend() | 82 | PM core's point of view it is not necessary to implement a ->runtime_suspend() |
79 | callback in a device driver as long as the subsystem-level suspend callback | 83 | callback in a device driver as long as the subsystem-level suspend callback |
80 | knows what to do to handle the device). | 84 | knows what to do to handle the device). |
81 | 85 | ||
82 | * Once the subsystem-level suspend callback has completed successfully | 86 | * Once the subsystem-level suspend callback (or the driver suspend callback, |
83 | for given device, the PM core regards the device as suspended, which need | 87 | if invoked directly) has completed successfully for the given device, the PM |
84 | not mean that the device has been put into a low power state. It is | 88 | core regards the device as suspended, which need not mean that it has been |
85 | supposed to mean, however, that the device will not process data and will | 89 | put into a low power state. It is supposed to mean, however, that the |
86 | not communicate with the CPU(s) and RAM until the subsystem-level resume | 90 | device will not process data and will not communicate with the CPU(s) and |
87 | callback is executed for it. The runtime PM status of a device after | 91 | RAM until the appropriate resume callback is executed for it. The runtime |
88 | successful execution of the subsystem-level suspend callback is 'suspended'. | 92 | PM status of a device after successful execution of the suspend callback is |
89 | 93 | 'suspended'. | |
90 | * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN, | 94 | |
91 | the device's runtime PM status is 'active', which means that the device | 95 | * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM |
92 | _must_ be fully operational afterwards. | 96 | status remains 'active', which means that the device _must_ be fully |
93 | 97 | operational afterwards. | |
94 | * If the subsystem-level suspend callback returns an error code different | 98 | |
95 | from -EBUSY or -EAGAIN, the PM core regards this as a fatal error and will | 99 | * If the suspend callback returns an error code different from -EBUSY and |
96 | refuse to run the helper functions described in Section 4 for the device, | 100 | -EAGAIN, the PM core regards this as a fatal error and will refuse to run |
97 | until the status of it is directly set either to 'active', or to 'suspended' | 101 | the helper functions described in Section 4 for the device until its status |
98 | (the PM core provides special helper functions for this purpose). | 102 | is directly set to either'active', or 'suspended' (the PM core provides |
99 | 103 | special helper functions for this purpose). | |
100 | In particular, if the driver requires remote wake-up capability (i.e. hardware | 104 | |
105 | In particular, if the driver requires remote wakeup capability (i.e. hardware | ||
101 | mechanism allowing the device to request a change of its power state, such as | 106 | mechanism allowing the device to request a change of its power state, such as |
102 | PCI PME) for proper functioning and device_run_wake() returns 'false' for the | 107 | PCI PME) for proper functioning and device_run_wake() returns 'false' for the |
103 | device, then ->runtime_suspend() should return -EBUSY. On the other hand, if | 108 | device, then ->runtime_suspend() should return -EBUSY. On the other hand, if |
104 | device_run_wake() returns 'true' for the device and the device is put into a low | 109 | device_run_wake() returns 'true' for the device and the device is put into a |
105 | power state during the execution of the subsystem-level suspend callback, it is | 110 | low-power state during the execution of the suspend callback, it is expected |
106 | expected that remote wake-up will be enabled for the device. Generally, remote | 111 | that remote wakeup will be enabled for the device. Generally, remote wakeup |
107 | wake-up should be enabled for all input devices put into a low power state at | 112 | should be enabled for all input devices put into low-power states at run time. |
108 | run time. | 113 | |
109 | 114 | The subsystem-level resume callback, if present, is _entirely_ _responsible_ for | |
110 | The subsystem-level resume callback is _entirely_ _responsible_ for handling the | 115 | handling the resume of the device as appropriate, which may, but need not |
111 | resume of the device as appropriate, which may, but need not include executing | 116 | include executing the device driver's own ->runtime_resume() callback (from the |
112 | the device driver's own ->runtime_resume() callback (from the PM core's point of | 117 | PM core's point of view it is not necessary to implement a ->runtime_resume() |
113 | view it is not necessary to implement a ->runtime_resume() callback in a device | 118 | callback in a device driver as long as the subsystem-level resume callback knows |
114 | driver as long as the subsystem-level resume callback knows what to do to handle | 119 | what to do to handle the device). |
115 | the device). | 120 | |
116 | 121 | * Once the subsystem-level resume callback (or the driver resume callback, if | |
117 | * Once the subsystem-level resume callback has completed successfully, the PM | 122 | invoked directly) has completed successfully, the PM core regards the device |
118 | core regards the device as fully operational, which means that the device | 123 | as fully operational, which means that the device _must_ be able to complete |
119 | _must_ be able to complete I/O operations as needed. The runtime PM status | 124 | I/O operations as needed. The runtime PM status of the device is then |
120 | of the device is then 'active'. | 125 | 'active'. |
121 | 126 | ||
122 | * If the subsystem-level resume callback returns an error code, the PM core | 127 | * If the resume callback returns an error code, the PM core regards this as a |
123 | regards this as a fatal error and will refuse to run the helper functions | 128 | fatal error and will refuse to run the helper functions described in Section |
124 | described in Section 4 for the device, until its status is directly set | 129 | 4 for the device, until its status is directly set to either 'active', or |
125 | either to 'active' or to 'suspended' (the PM core provides special helper | 130 | 'suspended' (by means of special helper functions provided by the PM core |
126 | functions for this purpose). | 131 | for this purpose). |
127 | 132 | ||
128 | The subsystem-level idle callback is executed by the PM core whenever the device | 133 | The idle callback (a subsystem-level one, if present, or the driver one) is |
129 | appears to be idle, which is indicated to the PM core by two counters, the | 134 | executed by the PM core whenever the device appears to be idle, which is |
130 | device's usage counter and the counter of 'active' children of the device. | 135 | indicated to the PM core by two counters, the device's usage counter and the |
136 | counter of 'active' children of the device. | ||
131 | 137 | ||
132 | * If any of these counters is decreased using a helper function provided by | 138 | * If any of these counters is decreased using a helper function provided by |
133 | the PM core and it turns out to be equal to zero, the other counter is | 139 | the PM core and it turns out to be equal to zero, the other counter is |
134 | checked. If that counter also is equal to zero, the PM core executes the | 140 | checked. If that counter also is equal to zero, the PM core executes the |
135 | subsystem-level idle callback with the device as an argument. | 141 | idle callback with the device as its argument. |
136 | 142 | ||
137 | The action performed by a subsystem-level idle callback is totally dependent on | 143 | The action performed by the idle callback is totally dependent on the subsystem |
138 | the subsystem in question, but the expected and recommended action is to check | 144 | (or driver) in question, but the expected and recommended action is to check |
139 | if the device can be suspended (i.e. if all of the conditions necessary for | 145 | if the device can be suspended (i.e. if all of the conditions necessary for |
140 | suspending the device are satisfied) and to queue up a suspend request for the | 146 | suspending the device are satisfied) and to queue up a suspend request for the |
141 | device in that case. The value returned by this callback is ignored by the PM | 147 | device in that case. The value returned by this callback is ignored by the PM |
142 | core. | 148 | core. |
143 | 149 | ||
144 | The helper functions provided by the PM core, described in Section 4, guarantee | 150 | The helper functions provided by the PM core, described in Section 4, guarantee |
145 | that the following constraints are met with respect to the bus type's runtime | 151 | that the following constraints are met with respect to runtime PM callbacks for |
146 | PM callbacks: | 152 | one device: |
147 | 153 | ||
148 | (1) The callbacks are mutually exclusive (e.g. it is forbidden to execute | 154 | (1) The callbacks are mutually exclusive (e.g. it is forbidden to execute |
149 | ->runtime_suspend() in parallel with ->runtime_resume() or with another | 155 | ->runtime_suspend() in parallel with ->runtime_resume() or with another |
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index efe998becc5b..462321c1aeea 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt | |||
@@ -41,7 +41,6 @@ ldd | |||
41 | Debugging modules | 41 | Debugging modules |
42 | The proc file system | 42 | The proc file system |
43 | Starting points for debugging scripting languages etc. | 43 | Starting points for debugging scripting languages etc. |
44 | Dumptool & Lcrash | ||
45 | SysRq | 44 | SysRq |
46 | References | 45 | References |
47 | Special Thanks | 46 | Special Thanks |
@@ -2455,39 +2454,6 @@ jdb <filename> another fully interactive gdb style debugger. | |||
2455 | 2454 | ||
2456 | 2455 | ||
2457 | 2456 | ||
2458 | Dumptool & Lcrash ( lkcd ) | ||
2459 | ========================== | ||
2460 | Michael Holzheu & others here at IBM have a fairly mature port of | ||
2461 | SGI's lcrash tool which allows one to look at kernel structures in a | ||
2462 | running kernel. | ||
2463 | |||
2464 | It also complements a tool called dumptool which dumps all the kernel's | ||
2465 | memory pages & registers to either a tape or a disk. | ||
2466 | This can be used by tech support or an ambitious end user do | ||
2467 | post mortem debugging of a machine like gdb core dumps. | ||
2468 | |||
2469 | Going into how to use this tool in detail will be explained | ||
2470 | in other documentation supplied by IBM with the patches & the | ||
2471 | lcrash homepage http://oss.sgi.com/projects/lkcd/ & the lcrash manpage. | ||
2472 | |||
2473 | How they work | ||
2474 | ------------- | ||
2475 | Lcrash is a perfectly normal program,however, it requires 2 | ||
2476 | additional files, Kerntypes which is built using a patch to the | ||
2477 | linux kernel sources in the linux root directory & the System.map. | ||
2478 | |||
2479 | Kerntypes is an objectfile whose sole purpose in life | ||
2480 | is to provide stabs debug info to lcrash, to do this | ||
2481 | Kerntypes is built from kerntypes.c which just includes the most commonly | ||
2482 | referenced header files used when debugging, lcrash can then read the | ||
2483 | .stabs section of this file. | ||
2484 | |||
2485 | Debugging a live system it uses /dev/mem | ||
2486 | alternatively for post mortem debugging it uses the data | ||
2487 | collected by dumptool. | ||
2488 | |||
2489 | |||
2490 | |||
2491 | SysRq | 2457 | SysRq |
2492 | ===== | 2458 | ===== |
2493 | This is now supported by linux for s/390 & z/Architecture. | 2459 | This is now supported by linux for s/390 & z/Architecture. |
diff --git a/Documentation/scsi/53c700.txt b/Documentation/scsi/53c700.txt index 0da681d497a2..e31aceb6df15 100644 --- a/Documentation/scsi/53c700.txt +++ b/Documentation/scsi/53c700.txt | |||
@@ -16,32 +16,13 @@ fill in to get the driver working. | |||
16 | Compile Time Flags | 16 | Compile Time Flags |
17 | ================== | 17 | ================== |
18 | 18 | ||
19 | The driver may be either io mapped or memory mapped. This is | 19 | A compile time flag is: |
20 | selectable by configuration flags: | ||
21 | |||
22 | CONFIG_53C700_MEM_MAPPED | ||
23 | |||
24 | define if the driver is memory mapped. | ||
25 | |||
26 | CONFIG_53C700_IO_MAPPED | ||
27 | |||
28 | define if the driver is to be io mapped. | ||
29 | |||
30 | One or other of the above flags *must* be defined. | ||
31 | |||
32 | Other flags are: | ||
33 | 20 | ||
34 | CONFIG_53C700_LE_ON_BE | 21 | CONFIG_53C700_LE_ON_BE |
35 | 22 | ||
36 | define if the chipset must be supported in little endian mode on a big | 23 | define if the chipset must be supported in little endian mode on a big |
37 | endian architecture (used for the 700 on parisc). | 24 | endian architecture (used for the 700 on parisc). |
38 | 25 | ||
39 | CONFIG_53C700_USE_CONSISTENT | ||
40 | |||
41 | allocate consistent memory (should only be used if your architecture | ||
42 | has a mixture of consistent and inconsistent memory). Fully | ||
43 | consistent or fully inconsistent architectures should not define this. | ||
44 | |||
45 | 26 | ||
46 | Using the Chip Core Driver | 27 | Using the Chip Core Driver |
47 | ========================== | 28 | ========================== |
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX index 19bc49439cac..99b85d39751c 100644 --- a/Documentation/security/00-INDEX +++ b/Documentation/security/00-INDEX | |||
@@ -1,5 +1,7 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - this file. | 2 | - this file. |
3 | LSM.txt | ||
4 | - description of the Linux Security Module framework. | ||
3 | SELinux.txt | 5 | SELinux.txt |
4 | - how to get started with the SELinux security enhancement. | 6 | - how to get started with the SELinux security enhancement. |
5 | Smack.txt | 7 | Smack.txt |
diff --git a/Documentation/security/LSM.txt b/Documentation/security/LSM.txt new file mode 100644 index 000000000000..c335a763a2ed --- /dev/null +++ b/Documentation/security/LSM.txt | |||
@@ -0,0 +1,34 @@ | |||
1 | Linux Security Module framework | ||
2 | ------------------------------- | ||
3 | |||
4 | The Linux Security Module (LSM) framework provides a mechanism for | ||
5 | various security checks to be hooked by new kernel extensions. The name | ||
6 | "module" is a bit of a misnomer since these extensions are not actually | ||
7 | loadable kernel modules. Instead, they are selectable at build-time via | ||
8 | CONFIG_DEFAULT_SECURITY and can be overridden at boot-time via the | ||
9 | "security=..." kernel command line argument, in the case where multiple | ||
10 | LSMs were built into a given kernel. | ||
11 | |||
12 | The primary users of the LSM interface are Mandatory Access Control | ||
13 | (MAC) extensions which provide a comprehensive security policy. Examples | ||
14 | include SELinux, Smack, Tomoyo, and AppArmor. In addition to the larger | ||
15 | MAC extensions, other extensions can be built using the LSM to provide | ||
16 | specific changes to system operation when these tweaks are not available | ||
17 | in the core functionality of Linux itself. | ||
18 | |||
19 | Without a specific LSM built into the kernel, the default LSM will be the | ||
20 | Linux capabilities system. Most LSMs choose to extend the capabilities | ||
21 | system, building their checks on top of the defined capability hooks. | ||
22 | For more details on capabilities, see capabilities(7) in the Linux | ||
23 | man-pages project. | ||
24 | |||
25 | Based on http://kerneltrap.org/Linux/Documenting_Security_Module_Intent, | ||
26 | a new LSM is accepted into the kernel when its intent (a description of | ||
27 | what it tries to protect against and in what cases one would expect to | ||
28 | use it) has been appropriately documented in Documentation/security/. | ||
29 | This allows an LSM's code to be easily compared to its goals, and so | ||
30 | that end users and distros can make a more informed decision about which | ||
31 | LSMs suit their requirements. | ||
32 | |||
33 | For extensive documentation on the available LSM hook interfaces, please | ||
34 | see include/linux/security.h. | ||
diff --git a/Documentation/security/credentials.txt b/Documentation/security/credentials.txt index fc0366cbd7ce..86257052e31a 100644 --- a/Documentation/security/credentials.txt +++ b/Documentation/security/credentials.txt | |||
@@ -221,10 +221,10 @@ The Linux kernel supports the following types of credentials: | |||
221 | (5) LSM | 221 | (5) LSM |
222 | 222 | ||
223 | The Linux Security Module allows extra controls to be placed over the | 223 | The Linux Security Module allows extra controls to be placed over the |
224 | operations that a task may do. Currently Linux supports two main | 224 | operations that a task may do. Currently Linux supports several LSM |
225 | alternate LSM options: SELinux and Smack. | 225 | options. |
226 | 226 | ||
227 | Both work by labelling the objects in a system and then applying sets of | 227 | Some work by labelling the objects in a system and then applying sets of |
228 | rules (policies) that say what operations a task with one label may do to | 228 | rules (policies) that say what operations a task with one label may do to |
229 | an object with another label. | 229 | an object with another label. |
230 | 230 | ||
diff --git a/Documentation/serial/driver b/Documentation/serial/driver index 77ba0afbe4db..0a25a9191864 100644 --- a/Documentation/serial/driver +++ b/Documentation/serial/driver | |||
@@ -101,7 +101,7 @@ hardware. | |||
101 | Returns the current state of modem control inputs. The state | 101 | Returns the current state of modem control inputs. The state |
102 | of the outputs should not be returned, since the core keeps | 102 | of the outputs should not be returned, since the core keeps |
103 | track of their state. The state information should include: | 103 | track of their state. The state information should include: |
104 | - TIOCM_DCD state of DCD signal | 104 | - TIOCM_CAR state of DCD signal |
105 | - TIOCM_CTS state of CTS signal | 105 | - TIOCM_CTS state of CTS signal |
106 | - TIOCM_DSR state of DSR signal | 106 | - TIOCM_DSR state of DSR signal |
107 | - TIOCM_RI state of RI signal | 107 | - TIOCM_RI state of RI signal |
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index edad99abec21..c8c54544abc5 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt | |||
@@ -42,19 +42,7 @@ ALC260 | |||
42 | 42 | ||
43 | ALC262 | 43 | ALC262 |
44 | ====== | 44 | ====== |
45 | fujitsu Fujitsu Laptop | 45 | N/A |
46 | benq Benq ED8 | ||
47 | benq-t31 Benq T31 | ||
48 | hippo Hippo (ATI) with jack detection, Sony UX-90s | ||
49 | hippo_1 Hippo (Benq) with jack detection | ||
50 | toshiba-s06 Toshiba S06 | ||
51 | toshiba-rx1 Toshiba RX1 | ||
52 | tyan Tyan Thunder n6650W (S2915-E) | ||
53 | ultra Samsung Q1 Ultra Vista model | ||
54 | lenovo-3000 Lenovo 3000 y410 | ||
55 | nec NEC Versa S9100 | ||
56 | basic fixed pin assignment w/o SPDIF | ||
57 | auto auto-config reading BIOS (default) | ||
58 | 46 | ||
59 | ALC267/268 | 47 | ALC267/268 |
60 | ========== | 48 | ========== |
@@ -350,7 +338,6 @@ STAC92HD83* | |||
350 | mic-ref Reference board with power management for ports | 338 | mic-ref Reference board with power management for ports |
351 | dell-s14 Dell laptop | 339 | dell-s14 Dell laptop |
352 | dell-vostro-3500 Dell Vostro 3500 laptop | 340 | dell-vostro-3500 Dell Vostro 3500 laptop |
353 | hp HP laptops with (inverted) mute-LED | ||
354 | hp-dv7-4000 HP dv-7 4000 | 341 | hp-dv7-4000 HP dv-7 4000 |
355 | auto BIOS setup (default) | 342 | auto BIOS setup (default) |
356 | 343 | ||
diff --git a/Documentation/sound/alsa/compress_offload.txt b/Documentation/sound/alsa/compress_offload.txt new file mode 100644 index 000000000000..c83a835350f0 --- /dev/null +++ b/Documentation/sound/alsa/compress_offload.txt | |||
@@ -0,0 +1,188 @@ | |||
1 | compress_offload.txt | ||
2 | ===================== | ||
3 | Pierre-Louis.Bossart <pierre-louis.bossart@linux.intel.com> | ||
4 | Vinod Koul <vinod.koul@linux.intel.com> | ||
5 | |||
6 | Overview | ||
7 | |||
8 | Since its early days, the ALSA API was defined with PCM support or | ||
9 | constant bitrates payloads such as IEC61937 in mind. Arguments and | ||
10 | returned values in frames are the norm, making it a challenge to | ||
11 | extend the existing API to compressed data streams. | ||
12 | |||
13 | In recent years, audio digital signal processors (DSP) were integrated | ||
14 | in system-on-chip designs, and DSPs are also integrated in audio | ||
15 | codecs. Processing compressed data on such DSPs results in a dramatic | ||
16 | reduction of power consumption compared to host-based | ||
17 | processing. Support for such hardware has not been very good in Linux, | ||
18 | mostly because of a lack of a generic API available in the mainline | ||
19 | kernel. | ||
20 | |||
21 | Rather than requiring a compability break with an API change of the | ||
22 | ALSA PCM interface, a new 'Compressed Data' API is introduced to | ||
23 | provide a control and data-streaming interface for audio DSPs. | ||
24 | |||
25 | The design of this API was inspired by the 2-year experience with the | ||
26 | Intel Moorestown SOC, with many corrections required to upstream the | ||
27 | API in the mainline kernel instead of the staging tree and make it | ||
28 | usable by others. | ||
29 | |||
30 | Requirements | ||
31 | |||
32 | The main requirements are: | ||
33 | |||
34 | - separation between byte counts and time. Compressed formats may have | ||
35 | a header per file, per frame, or no header at all. The payload size | ||
36 | may vary from frame-to-frame. As a result, it is not possible to | ||
37 | estimate reliably the duration of audio buffers when handling | ||
38 | compressed data. Dedicated mechanisms are required to allow for | ||
39 | reliable audio-video synchronization, which requires precise | ||
40 | reporting of the number of samples rendered at any given time. | ||
41 | |||
42 | - Handling of multiple formats. PCM data only requires a specification | ||
43 | of the sampling rate, number of channels and bits per sample. In | ||
44 | contrast, compressed data comes in a variety of formats. Audio DSPs | ||
45 | may also provide support for a limited number of audio encoders and | ||
46 | decoders embedded in firmware, or may support more choices through | ||
47 | dynamic download of libraries. | ||
48 | |||
49 | - Focus on main formats. This API provides support for the most | ||
50 | popular formats used for audio and video capture and playback. It is | ||
51 | likely that as audio compression technology advances, new formats | ||
52 | will be added. | ||
53 | |||
54 | - Handling of multiple configurations. Even for a given format like | ||
55 | AAC, some implementations may support AAC multichannel but HE-AAC | ||
56 | stereo. Likewise WMA10 level M3 may require too much memory and cpu | ||
57 | cycles. The new API needs to provide a generic way of listing these | ||
58 | formats. | ||
59 | |||
60 | - Rendering/Grabbing only. This API does not provide any means of | ||
61 | hardware acceleration, where PCM samples are provided back to | ||
62 | user-space for additional processing. This API focuses instead on | ||
63 | streaming compressed data to a DSP, with the assumption that the | ||
64 | decoded samples are routed to a physical output or logical back-end. | ||
65 | |||
66 | - Complexity hiding. Existing user-space multimedia frameworks all | ||
67 | have existing enums/structures for each compressed format. This new | ||
68 | API assumes the existence of a platform-specific compatibility layer | ||
69 | to expose, translate and make use of the capabilities of the audio | ||
70 | DSP, eg. Android HAL or PulseAudio sinks. By construction, regular | ||
71 | applications are not supposed to make use of this API. | ||
72 | |||
73 | |||
74 | Design | ||
75 | |||
76 | The new API shares a number of concepts with with the PCM API for flow | ||
77 | control. Start, pause, resume, drain and stop commands have the same | ||
78 | semantics no matter what the content is. | ||
79 | |||
80 | The concept of memory ring buffer divided in a set of fragments is | ||
81 | borrowed from the ALSA PCM API. However, only sizes in bytes can be | ||
82 | specified. | ||
83 | |||
84 | Seeks/trick modes are assumed to be handled by the host. | ||
85 | |||
86 | The notion of rewinds/forwards is not supported. Data committed to the | ||
87 | ring buffer cannot be invalidated, except when dropping all buffers. | ||
88 | |||
89 | The Compressed Data API does not make any assumptions on how the data | ||
90 | is transmitted to the audio DSP. DMA transfers from main memory to an | ||
91 | embedded audio cluster or to a SPI interface for external DSPs are | ||
92 | possible. As in the ALSA PCM case, a core set of routines is exposed; | ||
93 | each driver implementer will have to write support for a set of | ||
94 | mandatory routines and possibly make use of optional ones. | ||
95 | |||
96 | The main additions are | ||
97 | |||
98 | - get_caps | ||
99 | This routine returns the list of audio formats supported. Querying the | ||
100 | codecs on a capture stream will return encoders, decoders will be | ||
101 | listed for playback streams. | ||
102 | |||
103 | - get_codec_caps For each codec, this routine returns a list of | ||
104 | capabilities. The intent is to make sure all the capabilities | ||
105 | correspond to valid settings, and to minimize the risks of | ||
106 | configuration failures. For example, for a complex codec such as AAC, | ||
107 | the number of channels supported may depend on a specific profile. If | ||
108 | the capabilities were exposed with a single descriptor, it may happen | ||
109 | that a specific combination of profiles/channels/formats may not be | ||
110 | supported. Likewise, embedded DSPs have limited memory and cpu cycles, | ||
111 | it is likely that some implementations make the list of capabilities | ||
112 | dynamic and dependent on existing workloads. In addition to codec | ||
113 | settings, this routine returns the minimum buffer size handled by the | ||
114 | implementation. This information can be a function of the DMA buffer | ||
115 | sizes, the number of bytes required to synchronize, etc, and can be | ||
116 | used by userspace to define how much needs to be written in the ring | ||
117 | buffer before playback can start. | ||
118 | |||
119 | - set_params | ||
120 | This routine sets the configuration chosen for a specific codec. The | ||
121 | most important field in the parameters is the codec type; in most | ||
122 | cases decoders will ignore other fields, while encoders will strictly | ||
123 | comply to the settings | ||
124 | |||
125 | - get_params | ||
126 | This routines returns the actual settings used by the DSP. Changes to | ||
127 | the settings should remain the exception. | ||
128 | |||
129 | - get_timestamp | ||
130 | The timestamp becomes a multiple field structure. It lists the number | ||
131 | of bytes transferred, the number of samples processed and the number | ||
132 | of samples rendered/grabbed. All these values can be used to determine | ||
133 | the avarage bitrate, figure out if the ring buffer needs to be | ||
134 | refilled or the delay due to decoding/encoding/io on the DSP. | ||
135 | |||
136 | Note that the list of codecs/profiles/modes was derived from the | ||
137 | OpenMAX AL specification instead of reinventing the wheel. | ||
138 | Modifications include: | ||
139 | - Addition of FLAC and IEC formats | ||
140 | - Merge of encoder/decoder capabilities | ||
141 | - Profiles/modes listed as bitmasks to make descriptors more compact | ||
142 | - Addition of set_params for decoders (missing in OpenMAX AL) | ||
143 | - Addition of AMR/AMR-WB encoding modes (missing in OpenMAX AL) | ||
144 | - Addition of format information for WMA | ||
145 | - Addition of encoding options when required (derived from OpenMAX IL) | ||
146 | - Addition of rateControlSupported (missing in OpenMAX AL) | ||
147 | |||
148 | Not supported: | ||
149 | |||
150 | - Support for VoIP/circuit-switched calls is not the target of this | ||
151 | API. Support for dynamic bit-rate changes would require a tight | ||
152 | coupling between the DSP and the host stack, limiting power savings. | ||
153 | |||
154 | - Packet-loss concealment is not supported. This would require an | ||
155 | additional interface to let the decoder synthesize data when frames | ||
156 | are lost during transmission. This may be added in the future. | ||
157 | |||
158 | - Volume control/routing is not handled by this API. Devices exposing a | ||
159 | compressed data interface will be considered as regular ALSA devices; | ||
160 | volume changes and routing information will be provided with regular | ||
161 | ALSA kcontrols. | ||
162 | |||
163 | - Embedded audio effects. Such effects should be enabled in the same | ||
164 | manner, no matter if the input was PCM or compressed. | ||
165 | |||
166 | - multichannel IEC encoding. Unclear if this is required. | ||
167 | |||
168 | - Encoding/decoding acceleration is not supported as mentioned | ||
169 | above. It is possible to route the output of a decoder to a capture | ||
170 | stream, or even implement transcoding capabilities. This routing | ||
171 | would be enabled with ALSA kcontrols. | ||
172 | |||
173 | - Audio policy/resource management. This API does not provide any | ||
174 | hooks to query the utilization of the audio DSP, nor any premption | ||
175 | mechanisms. | ||
176 | |||
177 | - No notion of underun/overrun. Since the bytes written are compressed | ||
178 | in nature and data written/read doesn't translate directly to | ||
179 | rendered output in time, this does not deal with underrun/overun and | ||
180 | maybe dealt in user-library | ||
181 | |||
182 | Credits: | ||
183 | - Mark Brown and Liam Girdwood for discussions on the need for this API | ||
184 | - Harsha Priya for her work on intel_sst compressed API | ||
185 | - Rakesh Ughreja for valuable feedback | ||
186 | - Sing Nallasellan, Sikkandar Madar and Prasanna Samaga for | ||
187 | demonstrating and quantifying the benefits of audio offload on a | ||
188 | real platform. | ||
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 1f2463671a1a..8c20fbd8b42d 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -49,6 +49,7 @@ show up in /proc/sys/kernel: | |||
49 | - panic | 49 | - panic |
50 | - panic_on_oops | 50 | - panic_on_oops |
51 | - panic_on_unrecovered_nmi | 51 | - panic_on_unrecovered_nmi |
52 | - panic_on_stackoverflow | ||
52 | - pid_max | 53 | - pid_max |
53 | - powersave-nap [ PPC only ] | 54 | - powersave-nap [ PPC only ] |
54 | - printk | 55 | - printk |
@@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered. | |||
393 | 394 | ||
394 | ============================================================== | 395 | ============================================================== |
395 | 396 | ||
397 | panic_on_stackoverflow: | ||
398 | |||
399 | Controls the kernel's behavior when detecting the overflows of | ||
400 | kernel, IRQ and exception stacks except a user stack. | ||
401 | This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled. | ||
402 | |||
403 | 0: try to continue operation. | ||
404 | |||
405 | 1: panic immediately. | ||
406 | |||
407 | ============================================================== | ||
408 | |||
409 | |||
396 | pid_max: | 410 | pid_max: |
397 | 411 | ||
398 | PID allocation wrap value. When the kernel's next PID value | 412 | PID allocation wrap value. When the kernel's next PID value |
@@ -401,6 +415,14 @@ PIDs of value pid_max or larger are not allocated. | |||
401 | 415 | ||
402 | ============================================================== | 416 | ============================================================== |
403 | 417 | ||
418 | ns_last_pid: | ||
419 | |||
420 | The last pid allocated in the current (the one task using this sysctl | ||
421 | lives in) pid namespace. When selecting a pid for a next task on fork | ||
422 | kernel tries to allocate a number starting from this one. | ||
423 | |||
424 | ============================================================== | ||
425 | |||
404 | powersave-nap: (PPC only) | 426 | powersave-nap: (PPC only) |
405 | 427 | ||
406 | If set, Linux-PPC will use the 'nap' mode of powersaving, | 428 | If set, Linux-PPC will use the 'nap' mode of powersaving, |
diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.txt index aa82ee4a5a87..194800410061 100644 --- a/Documentation/trace/events-kmem.txt +++ b/Documentation/trace/events-kmem.txt | |||
@@ -40,8 +40,8 @@ but the call_site can usually be used to extrapolate that information. | |||
40 | ================== | 40 | ================== |
41 | mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s | 41 | mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s |
42 | mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d | 42 | mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d |
43 | mm_page_free_direct page=%p pfn=%lu order=%d | 43 | mm_page_free page=%p pfn=%lu order=%d |
44 | mm_pagevec_free page=%p pfn=%lu order=%d cold=%d | 44 | mm_page_free_batched page=%p pfn=%lu order=%d cold=%d |
45 | 45 | ||
46 | These four events deal with page allocation and freeing. mm_page_alloc is | 46 | These four events deal with page allocation and freeing. mm_page_alloc is |
47 | a simple indicator of page allocator activity. Pages may be allocated from | 47 | a simple indicator of page allocator activity. Pages may be allocated from |
@@ -53,13 +53,13 @@ amounts of activity imply high activity on the zone->lock. Taking this lock | |||
53 | impairs performance by disabling interrupts, dirtying cache lines between | 53 | impairs performance by disabling interrupts, dirtying cache lines between |
54 | CPUs and serialising many CPUs. | 54 | CPUs and serialising many CPUs. |
55 | 55 | ||
56 | When a page is freed directly by the caller, the mm_page_free_direct event | 56 | When a page is freed directly by the caller, the only mm_page_free event |
57 | is triggered. Significant amounts of activity here could indicate that the | 57 | is triggered. Significant amounts of activity here could indicate that the |
58 | callers should be batching their activities. | 58 | callers should be batching their activities. |
59 | 59 | ||
60 | When pages are freed using a pagevec, the mm_pagevec_free is | 60 | When pages are freed in batch, the also mm_page_free_batched is triggered. |
61 | triggered. Broadly speaking, pages are taken off the LRU lock in bulk and | 61 | Broadly speaking, pages are taken off the LRU lock in bulk and |
62 | freed in batch with a pagevec. Significant amounts of activity here could | 62 | freed in batch with a page list. Significant amounts of activity here could |
63 | indicate that the system is under memory pressure and can also indicate | 63 | indicate that the system is under memory pressure and can also indicate |
64 | contention on the zone->lru_lock. | 64 | contention on the zone->lru_lock. |
65 | 65 | ||
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index b510564aac7e..bb24c2a0e870 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt | |||
@@ -191,8 +191,6 @@ And for string fields they are: | |||
191 | 191 | ||
192 | Currently, only exact string matches are supported. | 192 | Currently, only exact string matches are supported. |
193 | 193 | ||
194 | Currently, the maximum number of predicates in a filter is 16. | ||
195 | |||
196 | 5.2 Setting filters | 194 | 5.2 Setting filters |
197 | ------------------- | 195 | ------------------- |
198 | 196 | ||
diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl index 7df50e8cf4d9..0a120aae33ce 100644 --- a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl +++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl | |||
@@ -17,8 +17,8 @@ use Getopt::Long; | |||
17 | 17 | ||
18 | # Tracepoint events | 18 | # Tracepoint events |
19 | use constant MM_PAGE_ALLOC => 1; | 19 | use constant MM_PAGE_ALLOC => 1; |
20 | use constant MM_PAGE_FREE_DIRECT => 2; | 20 | use constant MM_PAGE_FREE => 2; |
21 | use constant MM_PAGEVEC_FREE => 3; | 21 | use constant MM_PAGE_FREE_BATCHED => 3; |
22 | use constant MM_PAGE_PCPU_DRAIN => 4; | 22 | use constant MM_PAGE_PCPU_DRAIN => 4; |
23 | use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5; | 23 | use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5; |
24 | use constant MM_PAGE_ALLOC_EXTFRAG => 6; | 24 | use constant MM_PAGE_ALLOC_EXTFRAG => 6; |
@@ -223,10 +223,10 @@ EVENT_PROCESS: | |||
223 | # Perl Switch() sucks majorly | 223 | # Perl Switch() sucks majorly |
224 | if ($tracepoint eq "mm_page_alloc") { | 224 | if ($tracepoint eq "mm_page_alloc") { |
225 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; | 225 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; |
226 | } elsif ($tracepoint eq "mm_page_free_direct") { | 226 | } elsif ($tracepoint eq "mm_page_free") { |
227 | $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}++; | 227 | $perprocesspid{$process_pid}->{MM_PAGE_FREE}++ |
228 | } elsif ($tracepoint eq "mm_pagevec_free") { | 228 | } elsif ($tracepoint eq "mm_page_free_batched") { |
229 | $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}++; | 229 | $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++; |
230 | } elsif ($tracepoint eq "mm_page_pcpu_drain") { | 230 | } elsif ($tracepoint eq "mm_page_pcpu_drain") { |
231 | $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; | 231 | $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; |
232 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; | 232 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; |
@@ -336,8 +336,8 @@ sub dump_stats { | |||
336 | $process_pid, | 336 | $process_pid, |
337 | $stats{$process_pid}->{MM_PAGE_ALLOC}, | 337 | $stats{$process_pid}->{MM_PAGE_ALLOC}, |
338 | $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, | 338 | $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, |
339 | $stats{$process_pid}->{MM_PAGE_FREE_DIRECT}, | 339 | $stats{$process_pid}->{MM_PAGE_FREE}, |
340 | $stats{$process_pid}->{MM_PAGEVEC_FREE}, | 340 | $stats{$process_pid}->{MM_PAGE_FREE_BATCHED}, |
341 | $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, | 341 | $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, |
342 | $stats{$process_pid}->{HIGH_PCPU_DRAINS}, | 342 | $stats{$process_pid}->{HIGH_PCPU_DRAINS}, |
343 | $stats{$process_pid}->{HIGH_PCPU_REFILLS}, | 343 | $stats{$process_pid}->{HIGH_PCPU_REFILLS}, |
@@ -364,8 +364,8 @@ sub aggregate_perprocesspid() { | |||
364 | 364 | ||
365 | $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; | 365 | $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; |
366 | $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; | 366 | $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; |
367 | $perprocess{$process}->{MM_PAGE_FREE_DIRECT} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}; | 367 | $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE}; |
368 | $perprocess{$process}->{MM_PAGEVEC_FREE} += $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}; | 368 | $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}; |
369 | $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; | 369 | $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; |
370 | $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; | 370 | $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; |
371 | $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; | 371 | $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; |
diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt index 87bee3c129ba..058cc6c9dc56 100644 --- a/Documentation/trace/tracepoint-analysis.txt +++ b/Documentation/trace/tracepoint-analysis.txt | |||
@@ -93,14 +93,14 @@ By specifying the -a switch and analysing sleep, the system-wide events | |||
93 | for a duration of time can be examined. | 93 | for a duration of time can be examined. |
94 | 94 | ||
95 | $ perf stat -a \ | 95 | $ perf stat -a \ |
96 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 96 | -e kmem:mm_page_alloc -e kmem:mm_page_free \ |
97 | -e kmem:mm_pagevec_free \ | 97 | -e kmem:mm_page_free_batched \ |
98 | sleep 10 | 98 | sleep 10 |
99 | Performance counter stats for 'sleep 10': | 99 | Performance counter stats for 'sleep 10': |
100 | 100 | ||
101 | 9630 kmem:mm_page_alloc | 101 | 9630 kmem:mm_page_alloc |
102 | 2143 kmem:mm_page_free_direct | 102 | 2143 kmem:mm_page_free |
103 | 7424 kmem:mm_pagevec_free | 103 | 7424 kmem:mm_page_free_batched |
104 | 104 | ||
105 | 10.002577764 seconds time elapsed | 105 | 10.002577764 seconds time elapsed |
106 | 106 | ||
@@ -119,15 +119,15 @@ basis using set_ftrace_pid. | |||
119 | Events can be activated and tracked for the duration of a process on a local | 119 | Events can be activated and tracked for the duration of a process on a local |
120 | basis using PCL such as follows. | 120 | basis using PCL such as follows. |
121 | 121 | ||
122 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 122 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \ |
123 | -e kmem:mm_pagevec_free ./hackbench 10 | 123 | -e kmem:mm_page_free_batched ./hackbench 10 |
124 | Time: 0.909 | 124 | Time: 0.909 |
125 | 125 | ||
126 | Performance counter stats for './hackbench 10': | 126 | Performance counter stats for './hackbench 10': |
127 | 127 | ||
128 | 17803 kmem:mm_page_alloc | 128 | 17803 kmem:mm_page_alloc |
129 | 12398 kmem:mm_page_free_direct | 129 | 12398 kmem:mm_page_free |
130 | 4827 kmem:mm_pagevec_free | 130 | 4827 kmem:mm_page_free_batched |
131 | 131 | ||
132 | 0.973913387 seconds time elapsed | 132 | 0.973913387 seconds time elapsed |
133 | 133 | ||
@@ -146,8 +146,8 @@ to know what the standard deviation is. By and large, this is left to the | |||
146 | performance analyst to do it by hand. In the event that the discrete event | 146 | performance analyst to do it by hand. In the event that the discrete event |
147 | occurrences are useful to the performance analyst, then perf can be used. | 147 | occurrences are useful to the performance analyst, then perf can be used. |
148 | 148 | ||
149 | $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free_direct | 149 | $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free |
150 | -e kmem:mm_pagevec_free ./hackbench 10 | 150 | -e kmem:mm_page_free_batched ./hackbench 10 |
151 | Time: 0.890 | 151 | Time: 0.890 |
152 | Time: 0.895 | 152 | Time: 0.895 |
153 | Time: 0.915 | 153 | Time: 0.915 |
@@ -157,8 +157,8 @@ occurrences are useful to the performance analyst, then perf can be used. | |||
157 | Performance counter stats for './hackbench 10' (5 runs): | 157 | Performance counter stats for './hackbench 10' (5 runs): |
158 | 158 | ||
159 | 16630 kmem:mm_page_alloc ( +- 3.542% ) | 159 | 16630 kmem:mm_page_alloc ( +- 3.542% ) |
160 | 11486 kmem:mm_page_free_direct ( +- 4.771% ) | 160 | 11486 kmem:mm_page_free ( +- 4.771% ) |
161 | 4730 kmem:mm_pagevec_free ( +- 2.325% ) | 161 | 4730 kmem:mm_page_free_batched ( +- 2.325% ) |
162 | 162 | ||
163 | 0.982653002 seconds time elapsed ( +- 1.448% ) | 163 | 0.982653002 seconds time elapsed ( +- 1.448% ) |
164 | 164 | ||
@@ -168,15 +168,15 @@ aggregation of discrete events, then a script would need to be developed. | |||
168 | Using --repeat, it is also possible to view how events are fluctuating over | 168 | Using --repeat, it is also possible to view how events are fluctuating over |
169 | time on a system-wide basis using -a and sleep. | 169 | time on a system-wide basis using -a and sleep. |
170 | 170 | ||
171 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 171 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \ |
172 | -e kmem:mm_pagevec_free \ | 172 | -e kmem:mm_page_free_batched \ |
173 | -a --repeat 10 \ | 173 | -a --repeat 10 \ |
174 | sleep 1 | 174 | sleep 1 |
175 | Performance counter stats for 'sleep 1' (10 runs): | 175 | Performance counter stats for 'sleep 1' (10 runs): |
176 | 176 | ||
177 | 1066 kmem:mm_page_alloc ( +- 26.148% ) | 177 | 1066 kmem:mm_page_alloc ( +- 26.148% ) |
178 | 182 kmem:mm_page_free_direct ( +- 5.464% ) | 178 | 182 kmem:mm_page_free ( +- 5.464% ) |
179 | 890 kmem:mm_pagevec_free ( +- 30.079% ) | 179 | 890 kmem:mm_page_free_batched ( +- 30.079% ) |
180 | 180 | ||
181 | 1.002251757 seconds time elapsed ( +- 0.005% ) | 181 | 1.002251757 seconds time elapsed ( +- 0.005% ) |
182 | 182 | ||
@@ -220,8 +220,8 @@ were generating events within the kernel. To begin this sort of analysis, the | |||
220 | data must be recorded. At the time of writing, this required root: | 220 | data must be recorded. At the time of writing, this required root: |
221 | 221 | ||
222 | $ perf record -c 1 \ | 222 | $ perf record -c 1 \ |
223 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 223 | -e kmem:mm_page_alloc -e kmem:mm_page_free \ |
224 | -e kmem:mm_pagevec_free \ | 224 | -e kmem:mm_page_free_batched \ |
225 | ./hackbench 10 | 225 | ./hackbench 10 |
226 | Time: 0.894 | 226 | Time: 0.894 |
227 | [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ] | 227 | [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ] |
@@ -260,8 +260,8 @@ noticed that X was generating an insane amount of page allocations so let's look | |||
260 | at it: | 260 | at it: |
261 | 261 | ||
262 | $ perf record -c 1 -f \ | 262 | $ perf record -c 1 -f \ |
263 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 263 | -e kmem:mm_page_alloc -e kmem:mm_page_free \ |
264 | -e kmem:mm_pagevec_free \ | 264 | -e kmem:mm_page_free_batched \ |
265 | -p `pidof X` | 265 | -p `pidof X` |
266 | 266 | ||
267 | This was interrupted after a few seconds and | 267 | This was interrupted after a few seconds and |
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt index a4efa0462f05..5335fa8b06eb 100644 --- a/Documentation/usb/usbmon.txt +++ b/Documentation/usb/usbmon.txt | |||
@@ -47,10 +47,11 @@ This allows to filter away annoying devices that talk continuously. | |||
47 | 47 | ||
48 | 2. Find which bus connects to the desired device | 48 | 2. Find which bus connects to the desired device |
49 | 49 | ||
50 | Run "cat /proc/bus/usb/devices", and find the T-line which corresponds to | 50 | Run "cat /sys/kernel/debug/usb/devices", and find the T-line which corresponds |
51 | the device. Usually you do it by looking for the vendor string. If you have | 51 | to the device. Usually you do it by looking for the vendor string. If you have |
52 | many similar devices, unplug one and compare two /proc/bus/usb/devices outputs. | 52 | many similar devices, unplug one and compare the two |
53 | The T-line will have a bus number. Example: | 53 | /sys/kernel/debug/usb/devices outputs. The T-line will have a bus number. |
54 | Example: | ||
54 | 55 | ||
55 | T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 | 56 | T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 |
56 | D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 | 57 | D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 |
@@ -58,7 +59,10 @@ P: Vendor=0557 ProdID=2004 Rev= 1.00 | |||
58 | S: Manufacturer=ATEN | 59 | S: Manufacturer=ATEN |
59 | S: Product=UC100KM V2.00 | 60 | S: Product=UC100KM V2.00 |
60 | 61 | ||
61 | Bus=03 means it's bus 3. | 62 | "Bus=03" means it's bus 3. Alternatively, you can look at the output from |
63 | "lsusb" and get the bus number from the appropriate line. Example: | ||
64 | |||
65 | Bus 003 Device 002: ID 0557:2004 ATEN UC100KM V2.00 | ||
62 | 66 | ||
63 | 3. Start 'cat' | 67 | 3. Start 'cat' |
64 | 68 | ||
diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt index b7d401e0eae9..014423e2824c 100644 --- a/Documentation/vgaarbiter.txt +++ b/Documentation/vgaarbiter.txt | |||
@@ -177,7 +177,7 @@ II. Credits | |||
177 | 177 | ||
178 | Benjamin Herrenschmidt (IBM?) started this work when he discussed such design | 178 | Benjamin Herrenschmidt (IBM?) started this work when he discussed such design |
179 | with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and | 179 | with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and |
180 | Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work | 180 | Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work |
181 | enhancing the kernel code to adapt as a kernel module and also did the | 181 | enhancing the kernel code to adapt as a kernel module and also did the |
182 | implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave | 182 | implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave |
183 | Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree. | 183 | Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree. |
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e2a4b5287361..e1d94bf4056e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -1466,6 +1466,31 @@ is supported; 2 if the processor requires all virtual machines to have | |||
1466 | an RMA, or 1 if the processor can use an RMA but doesn't require it, | 1466 | an RMA, or 1 if the processor can use an RMA but doesn't require it, |
1467 | because it supports the Virtual RMA (VRMA) facility. | 1467 | because it supports the Virtual RMA (VRMA) facility. |
1468 | 1468 | ||
1469 | 4.64 KVM_NMI | ||
1470 | |||
1471 | Capability: KVM_CAP_USER_NMI | ||
1472 | Architectures: x86 | ||
1473 | Type: vcpu ioctl | ||
1474 | Parameters: none | ||
1475 | Returns: 0 on success, -1 on error | ||
1476 | |||
1477 | Queues an NMI on the thread's vcpu. Note this is well defined only | ||
1478 | when KVM_CREATE_IRQCHIP has not been called, since this is an interface | ||
1479 | between the virtual cpu core and virtual local APIC. After KVM_CREATE_IRQCHIP | ||
1480 | has been called, this interface is completely emulated within the kernel. | ||
1481 | |||
1482 | To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the | ||
1483 | following algorithm: | ||
1484 | |||
1485 | - pause the vpcu | ||
1486 | - read the local APIC's state (KVM_GET_LAPIC) | ||
1487 | - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1) | ||
1488 | - if so, issue KVM_NMI | ||
1489 | - resume the vcpu | ||
1490 | |||
1491 | Some guests configure the LINT1 NMI input to cause a panic, aiding in | ||
1492 | debugging. | ||
1493 | |||
1469 | 5. The kvm_run structure | 1494 | 5. The kvm_run structure |
1470 | 1495 | ||
1471 | Application code obtains a pointer to the kvm_run structure by | 1496 | Application code obtains a pointer to the kvm_run structure by |
diff --git a/Documentation/virtual/lguest/.gitignore b/Documentation/virtual/lguest/.gitignore deleted file mode 100644 index 115587fd5f65..000000000000 --- a/Documentation/virtual/lguest/.gitignore +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | lguest | ||
diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile deleted file mode 100644 index 0ac34206f7a7..000000000000 --- a/Documentation/virtual/lguest/Makefile +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # This creates the demonstration utility "lguest" which runs a Linux guest. | ||
2 | # Missing headers? Add "-I../../../include -I../../../arch/x86/include" | ||
3 | CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE | ||
4 | |||
5 | all: lguest | ||
6 | |||
7 | clean: | ||
8 | rm -f lguest | ||
diff --git a/Documentation/virtual/lguest/extract b/Documentation/virtual/lguest/extract deleted file mode 100644 index 7730bb6e4b94..000000000000 --- a/Documentation/virtual/lguest/extract +++ /dev/null | |||
@@ -1,58 +0,0 @@ | |||
1 | #! /bin/sh | ||
2 | |||
3 | set -e | ||
4 | |||
5 | PREFIX=$1 | ||
6 | shift | ||
7 | |||
8 | trap 'rm -r $TMPDIR' 0 | ||
9 | TMPDIR=`mktemp -d` | ||
10 | |||
11 | exec 3>/dev/null | ||
12 | for f; do | ||
13 | while IFS=" | ||
14 | " read -r LINE; do | ||
15 | case "$LINE" in | ||
16 | *$PREFIX:[0-9]*:\**) | ||
17 | NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` | ||
18 | if [ -f $TMPDIR/$NUM ]; then | ||
19 | echo "$TMPDIR/$NUM already exits prior to $f" | ||
20 | exit 1 | ||
21 | fi | ||
22 | exec 3>>$TMPDIR/$NUM | ||
23 | echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM | ||
24 | /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3 | ||
25 | ;; | ||
26 | *$PREFIX:[0-9]*) | ||
27 | NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` | ||
28 | if [ -f $TMPDIR/$NUM ]; then | ||
29 | echo "$TMPDIR/$NUM already exits prior to $f" | ||
30 | exit 1 | ||
31 | fi | ||
32 | exec 3>>$TMPDIR/$NUM | ||
33 | echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM | ||
34 | /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3 | ||
35 | ;; | ||
36 | *:\**) | ||
37 | /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3 | ||
38 | echo >&3 | ||
39 | exec 3>/dev/null | ||
40 | ;; | ||
41 | *) | ||
42 | /bin/echo "$LINE" >&3 | ||
43 | ;; | ||
44 | esac | ||
45 | done < $f | ||
46 | echo >&3 | ||
47 | exec 3>/dev/null | ||
48 | done | ||
49 | |||
50 | LASTFILE="" | ||
51 | for f in $TMPDIR/*; do | ||
52 | if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then | ||
53 | LASTFILE=$(cat $TMPDIR/.$(basename $f) ) | ||
54 | echo "[ $LASTFILE ]" | ||
55 | fi | ||
56 | cat $f | ||
57 | done | ||
58 | |||
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c deleted file mode 100644 index c095d79cae73..000000000000 --- a/Documentation/virtual/lguest/lguest.c +++ /dev/null | |||
@@ -1,2065 +0,0 @@ | |||
1 | /*P:100 | ||
2 | * This is the Launcher code, a simple program which lays out the "physical" | ||
3 | * memory for the new Guest by mapping the kernel image and the virtual | ||
4 | * devices, then opens /dev/lguest to tell the kernel about the Guest and | ||
5 | * control it. | ||
6 | :*/ | ||
7 | #define _LARGEFILE64_SOURCE | ||
8 | #define _GNU_SOURCE | ||
9 | #include <stdio.h> | ||
10 | #include <string.h> | ||
11 | #include <unistd.h> | ||
12 | #include <err.h> | ||
13 | #include <stdint.h> | ||
14 | #include <stdlib.h> | ||
15 | #include <elf.h> | ||
16 | #include <sys/mman.h> | ||
17 | #include <sys/param.h> | ||
18 | #include <sys/types.h> | ||
19 | #include <sys/stat.h> | ||
20 | #include <sys/wait.h> | ||
21 | #include <sys/eventfd.h> | ||
22 | #include <fcntl.h> | ||
23 | #include <stdbool.h> | ||
24 | #include <errno.h> | ||
25 | #include <ctype.h> | ||
26 | #include <sys/socket.h> | ||
27 | #include <sys/ioctl.h> | ||
28 | #include <sys/time.h> | ||
29 | #include <time.h> | ||
30 | #include <netinet/in.h> | ||
31 | #include <net/if.h> | ||
32 | #include <linux/sockios.h> | ||
33 | #include <linux/if_tun.h> | ||
34 | #include <sys/uio.h> | ||
35 | #include <termios.h> | ||
36 | #include <getopt.h> | ||
37 | #include <assert.h> | ||
38 | #include <sched.h> | ||
39 | #include <limits.h> | ||
40 | #include <stddef.h> | ||
41 | #include <signal.h> | ||
42 | #include <pwd.h> | ||
43 | #include <grp.h> | ||
44 | |||
45 | #include <linux/virtio_config.h> | ||
46 | #include <linux/virtio_net.h> | ||
47 | #include <linux/virtio_blk.h> | ||
48 | #include <linux/virtio_console.h> | ||
49 | #include <linux/virtio_rng.h> | ||
50 | #include <linux/virtio_ring.h> | ||
51 | #include <asm/bootparam.h> | ||
52 | #include "../../../include/linux/lguest_launcher.h" | ||
53 | /*L:110 | ||
54 | * We can ignore the 43 include files we need for this program, but I do want | ||
55 | * to draw attention to the use of kernel-style types. | ||
56 | * | ||
57 | * As Linus said, "C is a Spartan language, and so should your naming be." I | ||
58 | * like these abbreviations, so we define them here. Note that u64 is always | ||
59 | * unsigned long long, which works on all Linux systems: this means that we can | ||
60 | * use %llu in printf for any u64. | ||
61 | */ | ||
62 | typedef unsigned long long u64; | ||
63 | typedef uint32_t u32; | ||
64 | typedef uint16_t u16; | ||
65 | typedef uint8_t u8; | ||
66 | /*:*/ | ||
67 | |||
68 | #define BRIDGE_PFX "bridge:" | ||
69 | #ifndef SIOCBRADDIF | ||
70 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ | ||
71 | #endif | ||
72 | /* We can have up to 256 pages for devices. */ | ||
73 | #define DEVICE_PAGES 256 | ||
74 | /* This will occupy 3 pages: it must be a power of 2. */ | ||
75 | #define VIRTQUEUE_NUM 256 | ||
76 | |||
77 | /*L:120 | ||
78 | * verbose is both a global flag and a macro. The C preprocessor allows | ||
79 | * this, and although I wouldn't recommend it, it works quite nicely here. | ||
80 | */ | ||
81 | static bool verbose; | ||
82 | #define verbose(args...) \ | ||
83 | do { if (verbose) printf(args); } while(0) | ||
84 | /*:*/ | ||
85 | |||
86 | /* The pointer to the start of guest memory. */ | ||
87 | static void *guest_base; | ||
88 | /* The maximum guest physical address allowed, and maximum possible. */ | ||
89 | static unsigned long guest_limit, guest_max; | ||
90 | /* The /dev/lguest file descriptor. */ | ||
91 | static int lguest_fd; | ||
92 | |||
93 | /* a per-cpu variable indicating whose vcpu is currently running */ | ||
94 | static unsigned int __thread cpu_id; | ||
95 | |||
96 | /* This is our list of devices. */ | ||
97 | struct device_list { | ||
98 | /* Counter to assign interrupt numbers. */ | ||
99 | unsigned int next_irq; | ||
100 | |||
101 | /* Counter to print out convenient device numbers. */ | ||
102 | unsigned int device_num; | ||
103 | |||
104 | /* The descriptor page for the devices. */ | ||
105 | u8 *descpage; | ||
106 | |||
107 | /* A single linked list of devices. */ | ||
108 | struct device *dev; | ||
109 | /* And a pointer to the last device for easy append. */ | ||
110 | struct device *lastdev; | ||
111 | }; | ||
112 | |||
113 | /* The list of Guest devices, based on command line arguments. */ | ||
114 | static struct device_list devices; | ||
115 | |||
116 | /* The device structure describes a single device. */ | ||
117 | struct device { | ||
118 | /* The linked-list pointer. */ | ||
119 | struct device *next; | ||
120 | |||
121 | /* The device's descriptor, as mapped into the Guest. */ | ||
122 | struct lguest_device_desc *desc; | ||
123 | |||
124 | /* We can't trust desc values once Guest has booted: we use these. */ | ||
125 | unsigned int feature_len; | ||
126 | unsigned int num_vq; | ||
127 | |||
128 | /* The name of this device, for --verbose. */ | ||
129 | const char *name; | ||
130 | |||
131 | /* Any queues attached to this device */ | ||
132 | struct virtqueue *vq; | ||
133 | |||
134 | /* Is it operational */ | ||
135 | bool running; | ||
136 | |||
137 | /* Device-specific data. */ | ||
138 | void *priv; | ||
139 | }; | ||
140 | |||
141 | /* The virtqueue structure describes a queue attached to a device. */ | ||
142 | struct virtqueue { | ||
143 | struct virtqueue *next; | ||
144 | |||
145 | /* Which device owns me. */ | ||
146 | struct device *dev; | ||
147 | |||
148 | /* The configuration for this queue. */ | ||
149 | struct lguest_vqconfig config; | ||
150 | |||
151 | /* The actual ring of buffers. */ | ||
152 | struct vring vring; | ||
153 | |||
154 | /* Last available index we saw. */ | ||
155 | u16 last_avail_idx; | ||
156 | |||
157 | /* How many are used since we sent last irq? */ | ||
158 | unsigned int pending_used; | ||
159 | |||
160 | /* Eventfd where Guest notifications arrive. */ | ||
161 | int eventfd; | ||
162 | |||
163 | /* Function for the thread which is servicing this virtqueue. */ | ||
164 | void (*service)(struct virtqueue *vq); | ||
165 | pid_t thread; | ||
166 | }; | ||
167 | |||
168 | /* Remember the arguments to the program so we can "reboot" */ | ||
169 | static char **main_args; | ||
170 | |||
171 | /* The original tty settings to restore on exit. */ | ||
172 | static struct termios orig_term; | ||
173 | |||
174 | /* | ||
175 | * We have to be careful with barriers: our devices are all run in separate | ||
176 | * threads and so we need to make sure that changes visible to the Guest happen | ||
177 | * in precise order. | ||
178 | */ | ||
179 | #define wmb() __asm__ __volatile__("" : : : "memory") | ||
180 | #define mb() __asm__ __volatile__("" : : : "memory") | ||
181 | |||
182 | /* | ||
183 | * Convert an iovec element to the given type. | ||
184 | * | ||
185 | * This is a fairly ugly trick: we need to know the size of the type and | ||
186 | * alignment requirement to check the pointer is kosher. It's also nice to | ||
187 | * have the name of the type in case we report failure. | ||
188 | * | ||
189 | * Typing those three things all the time is cumbersome and error prone, so we | ||
190 | * have a macro which sets them all up and passes to the real function. | ||
191 | */ | ||
192 | #define convert(iov, type) \ | ||
193 | ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) | ||
194 | |||
195 | static void *_convert(struct iovec *iov, size_t size, size_t align, | ||
196 | const char *name) | ||
197 | { | ||
198 | if (iov->iov_len != size) | ||
199 | errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); | ||
200 | if ((unsigned long)iov->iov_base % align != 0) | ||
201 | errx(1, "Bad alignment %p for %s", iov->iov_base, name); | ||
202 | return iov->iov_base; | ||
203 | } | ||
204 | |||
205 | /* Wrapper for the last available index. Makes it easier to change. */ | ||
206 | #define lg_last_avail(vq) ((vq)->last_avail_idx) | ||
207 | |||
208 | /* | ||
209 | * The virtio configuration space is defined to be little-endian. x86 is | ||
210 | * little-endian too, but it's nice to be explicit so we have these helpers. | ||
211 | */ | ||
212 | #define cpu_to_le16(v16) (v16) | ||
213 | #define cpu_to_le32(v32) (v32) | ||
214 | #define cpu_to_le64(v64) (v64) | ||
215 | #define le16_to_cpu(v16) (v16) | ||
216 | #define le32_to_cpu(v32) (v32) | ||
217 | #define le64_to_cpu(v64) (v64) | ||
218 | |||
219 | /* Is this iovec empty? */ | ||
220 | static bool iov_empty(const struct iovec iov[], unsigned int num_iov) | ||
221 | { | ||
222 | unsigned int i; | ||
223 | |||
224 | for (i = 0; i < num_iov; i++) | ||
225 | if (iov[i].iov_len) | ||
226 | return false; | ||
227 | return true; | ||
228 | } | ||
229 | |||
230 | /* Take len bytes from the front of this iovec. */ | ||
231 | static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) | ||
232 | { | ||
233 | unsigned int i; | ||
234 | |||
235 | for (i = 0; i < num_iov; i++) { | ||
236 | unsigned int used; | ||
237 | |||
238 | used = iov[i].iov_len < len ? iov[i].iov_len : len; | ||
239 | iov[i].iov_base += used; | ||
240 | iov[i].iov_len -= used; | ||
241 | len -= used; | ||
242 | } | ||
243 | assert(len == 0); | ||
244 | } | ||
245 | |||
246 | /* The device virtqueue descriptors are followed by feature bitmasks. */ | ||
247 | static u8 *get_feature_bits(struct device *dev) | ||
248 | { | ||
249 | return (u8 *)(dev->desc + 1) | ||
250 | + dev->num_vq * sizeof(struct lguest_vqconfig); | ||
251 | } | ||
252 | |||
253 | /*L:100 | ||
254 | * The Launcher code itself takes us out into userspace, that scary place where | ||
255 | * pointers run wild and free! Unfortunately, like most userspace programs, | ||
256 | * it's quite boring (which is why everyone likes to hack on the kernel!). | ||
257 | * Perhaps if you make up an Lguest Drinking Game at this point, it will get | ||
258 | * you through this section. Or, maybe not. | ||
259 | * | ||
260 | * The Launcher sets up a big chunk of memory to be the Guest's "physical" | ||
261 | * memory and stores it in "guest_base". In other words, Guest physical == | ||
262 | * Launcher virtual with an offset. | ||
263 | * | ||
264 | * This can be tough to get your head around, but usually it just means that we | ||
265 | * use these trivial conversion functions when the Guest gives us its | ||
266 | * "physical" addresses: | ||
267 | */ | ||
268 | static void *from_guest_phys(unsigned long addr) | ||
269 | { | ||
270 | return guest_base + addr; | ||
271 | } | ||
272 | |||
273 | static unsigned long to_guest_phys(const void *addr) | ||
274 | { | ||
275 | return (addr - guest_base); | ||
276 | } | ||
277 | |||
278 | /*L:130 | ||
279 | * Loading the Kernel. | ||
280 | * | ||
281 | * We start with couple of simple helper routines. open_or_die() avoids | ||
282 | * error-checking code cluttering the callers: | ||
283 | */ | ||
284 | static int open_or_die(const char *name, int flags) | ||
285 | { | ||
286 | int fd = open(name, flags); | ||
287 | if (fd < 0) | ||
288 | err(1, "Failed to open %s", name); | ||
289 | return fd; | ||
290 | } | ||
291 | |||
292 | /* map_zeroed_pages() takes a number of pages. */ | ||
293 | static void *map_zeroed_pages(unsigned int num) | ||
294 | { | ||
295 | int fd = open_or_die("/dev/zero", O_RDONLY); | ||
296 | void *addr; | ||
297 | |||
298 | /* | ||
299 | * We use a private mapping (ie. if we write to the page, it will be | ||
300 | * copied). We allocate an extra two pages PROT_NONE to act as guard | ||
301 | * pages against read/write attempts that exceed allocated space. | ||
302 | */ | ||
303 | addr = mmap(NULL, getpagesize() * (num+2), | ||
304 | PROT_NONE, MAP_PRIVATE, fd, 0); | ||
305 | |||
306 | if (addr == MAP_FAILED) | ||
307 | err(1, "Mmapping %u pages of /dev/zero", num); | ||
308 | |||
309 | if (mprotect(addr + getpagesize(), getpagesize() * num, | ||
310 | PROT_READ|PROT_WRITE) == -1) | ||
311 | err(1, "mprotect rw %u pages failed", num); | ||
312 | |||
313 | /* | ||
314 | * One neat mmap feature is that you can close the fd, and it | ||
315 | * stays mapped. | ||
316 | */ | ||
317 | close(fd); | ||
318 | |||
319 | /* Return address after PROT_NONE page */ | ||
320 | return addr + getpagesize(); | ||
321 | } | ||
322 | |||
323 | /* Get some more pages for a device. */ | ||
324 | static void *get_pages(unsigned int num) | ||
325 | { | ||
326 | void *addr = from_guest_phys(guest_limit); | ||
327 | |||
328 | guest_limit += num * getpagesize(); | ||
329 | if (guest_limit > guest_max) | ||
330 | errx(1, "Not enough memory for devices"); | ||
331 | return addr; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * This routine is used to load the kernel or initrd. It tries mmap, but if | ||
336 | * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), | ||
337 | * it falls back to reading the memory in. | ||
338 | */ | ||
339 | static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) | ||
340 | { | ||
341 | ssize_t r; | ||
342 | |||
343 | /* | ||
344 | * We map writable even though for some segments are marked read-only. | ||
345 | * The kernel really wants to be writable: it patches its own | ||
346 | * instructions. | ||
347 | * | ||
348 | * MAP_PRIVATE means that the page won't be copied until a write is | ||
349 | * done to it. This allows us to share untouched memory between | ||
350 | * Guests. | ||
351 | */ | ||
352 | if (mmap(addr, len, PROT_READ|PROT_WRITE, | ||
353 | MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) | ||
354 | return; | ||
355 | |||
356 | /* pread does a seek and a read in one shot: saves a few lines. */ | ||
357 | r = pread(fd, addr, len, offset); | ||
358 | if (r != len) | ||
359 | err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * This routine takes an open vmlinux image, which is in ELF, and maps it into | ||
364 | * the Guest memory. ELF = Embedded Linking Format, which is the format used | ||
365 | * by all modern binaries on Linux including the kernel. | ||
366 | * | ||
367 | * The ELF headers give *two* addresses: a physical address, and a virtual | ||
368 | * address. We use the physical address; the Guest will map itself to the | ||
369 | * virtual address. | ||
370 | * | ||
371 | * We return the starting address. | ||
372 | */ | ||
373 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) | ||
374 | { | ||
375 | Elf32_Phdr phdr[ehdr->e_phnum]; | ||
376 | unsigned int i; | ||
377 | |||
378 | /* | ||
379 | * Sanity checks on the main ELF header: an x86 executable with a | ||
380 | * reasonable number of correctly-sized program headers. | ||
381 | */ | ||
382 | if (ehdr->e_type != ET_EXEC | ||
383 | || ehdr->e_machine != EM_386 | ||
384 | || ehdr->e_phentsize != sizeof(Elf32_Phdr) | ||
385 | || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) | ||
386 | errx(1, "Malformed elf header"); | ||
387 | |||
388 | /* | ||
389 | * An ELF executable contains an ELF header and a number of "program" | ||
390 | * headers which indicate which parts ("segments") of the program to | ||
391 | * load where. | ||
392 | */ | ||
393 | |||
394 | /* We read in all the program headers at once: */ | ||
395 | if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) | ||
396 | err(1, "Seeking to program headers"); | ||
397 | if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) | ||
398 | err(1, "Reading program headers"); | ||
399 | |||
400 | /* | ||
401 | * Try all the headers: there are usually only three. A read-only one, | ||
402 | * a read-write one, and a "note" section which we don't load. | ||
403 | */ | ||
404 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
405 | /* If this isn't a loadable segment, we ignore it */ | ||
406 | if (phdr[i].p_type != PT_LOAD) | ||
407 | continue; | ||
408 | |||
409 | verbose("Section %i: size %i addr %p\n", | ||
410 | i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); | ||
411 | |||
412 | /* We map this section of the file at its physical address. */ | ||
413 | map_at(elf_fd, from_guest_phys(phdr[i].p_paddr), | ||
414 | phdr[i].p_offset, phdr[i].p_filesz); | ||
415 | } | ||
416 | |||
417 | /* The entry point is given in the ELF header. */ | ||
418 | return ehdr->e_entry; | ||
419 | } | ||
420 | |||
421 | /*L:150 | ||
422 | * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed | ||
423 | * to jump into it and it will unpack itself. We used to have to perform some | ||
424 | * hairy magic because the unpacking code scared me. | ||
425 | * | ||
426 | * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote | ||
427 | * a small patch to jump over the tricky bits in the Guest, so now we just read | ||
428 | * the funky header so we know where in the file to load, and away we go! | ||
429 | */ | ||
430 | static unsigned long load_bzimage(int fd) | ||
431 | { | ||
432 | struct boot_params boot; | ||
433 | int r; | ||
434 | /* Modern bzImages get loaded at 1M. */ | ||
435 | void *p = from_guest_phys(0x100000); | ||
436 | |||
437 | /* | ||
438 | * Go back to the start of the file and read the header. It should be | ||
439 | * a Linux boot header (see Documentation/x86/boot.txt) | ||
440 | */ | ||
441 | lseek(fd, 0, SEEK_SET); | ||
442 | read(fd, &boot, sizeof(boot)); | ||
443 | |||
444 | /* Inside the setup_hdr, we expect the magic "HdrS" */ | ||
445 | if (memcmp(&boot.hdr.header, "HdrS", 4) != 0) | ||
446 | errx(1, "This doesn't look like a bzImage to me"); | ||
447 | |||
448 | /* Skip over the extra sectors of the header. */ | ||
449 | lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET); | ||
450 | |||
451 | /* Now read everything into memory. in nice big chunks. */ | ||
452 | while ((r = read(fd, p, 65536)) > 0) | ||
453 | p += r; | ||
454 | |||
455 | /* Finally, code32_start tells us where to enter the kernel. */ | ||
456 | return boot.hdr.code32_start; | ||
457 | } | ||
458 | |||
459 | /*L:140 | ||
460 | * Loading the kernel is easy when it's a "vmlinux", but most kernels | ||
461 | * come wrapped up in the self-decompressing "bzImage" format. With a little | ||
462 | * work, we can load those, too. | ||
463 | */ | ||
464 | static unsigned long load_kernel(int fd) | ||
465 | { | ||
466 | Elf32_Ehdr hdr; | ||
467 | |||
468 | /* Read in the first few bytes. */ | ||
469 | if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) | ||
470 | err(1, "Reading kernel"); | ||
471 | |||
472 | /* If it's an ELF file, it starts with "\177ELF" */ | ||
473 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) | ||
474 | return map_elf(fd, &hdr); | ||
475 | |||
476 | /* Otherwise we assume it's a bzImage, and try to load it. */ | ||
477 | return load_bzimage(fd); | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * This is a trivial little helper to align pages. Andi Kleen hated it because | ||
482 | * it calls getpagesize() twice: "it's dumb code." | ||
483 | * | ||
484 | * Kernel guys get really het up about optimization, even when it's not | ||
485 | * necessary. I leave this code as a reaction against that. | ||
486 | */ | ||
487 | static inline unsigned long page_align(unsigned long addr) | ||
488 | { | ||
489 | /* Add upwards and truncate downwards. */ | ||
490 | return ((addr + getpagesize()-1) & ~(getpagesize()-1)); | ||
491 | } | ||
492 | |||
493 | /*L:180 | ||
494 | * An "initial ram disk" is a disk image loaded into memory along with the | ||
495 | * kernel which the kernel can use to boot from without needing any drivers. | ||
496 | * Most distributions now use this as standard: the initrd contains the code to | ||
497 | * load the appropriate driver modules for the current machine. | ||
498 | * | ||
499 | * Importantly, James Morris works for RedHat, and Fedora uses initrds for its | ||
500 | * kernels. He sent me this (and tells me when I break it). | ||
501 | */ | ||
502 | static unsigned long load_initrd(const char *name, unsigned long mem) | ||
503 | { | ||
504 | int ifd; | ||
505 | struct stat st; | ||
506 | unsigned long len; | ||
507 | |||
508 | ifd = open_or_die(name, O_RDONLY); | ||
509 | /* fstat() is needed to get the file size. */ | ||
510 | if (fstat(ifd, &st) < 0) | ||
511 | err(1, "fstat() on initrd '%s'", name); | ||
512 | |||
513 | /* | ||
514 | * We map the initrd at the top of memory, but mmap wants it to be | ||
515 | * page-aligned, so we round the size up for that. | ||
516 | */ | ||
517 | len = page_align(st.st_size); | ||
518 | map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); | ||
519 | /* | ||
520 | * Once a file is mapped, you can close the file descriptor. It's a | ||
521 | * little odd, but quite useful. | ||
522 | */ | ||
523 | close(ifd); | ||
524 | verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); | ||
525 | |||
526 | /* We return the initrd size. */ | ||
527 | return len; | ||
528 | } | ||
529 | /*:*/ | ||
530 | |||
531 | /* | ||
532 | * Simple routine to roll all the commandline arguments together with spaces | ||
533 | * between them. | ||
534 | */ | ||
535 | static void concat(char *dst, char *args[]) | ||
536 | { | ||
537 | unsigned int i, len = 0; | ||
538 | |||
539 | for (i = 0; args[i]; i++) { | ||
540 | if (i) { | ||
541 | strcat(dst+len, " "); | ||
542 | len++; | ||
543 | } | ||
544 | strcpy(dst+len, args[i]); | ||
545 | len += strlen(args[i]); | ||
546 | } | ||
547 | /* In case it's empty. */ | ||
548 | dst[len] = '\0'; | ||
549 | } | ||
550 | |||
551 | /*L:185 | ||
552 | * This is where we actually tell the kernel to initialize the Guest. We | ||
553 | * saw the arguments it expects when we looked at initialize() in lguest_user.c: | ||
554 | * the base of Guest "physical" memory, the top physical page to allow and the | ||
555 | * entry point for the Guest. | ||
556 | */ | ||
557 | static void tell_kernel(unsigned long start) | ||
558 | { | ||
559 | unsigned long args[] = { LHREQ_INITIALIZE, | ||
560 | (unsigned long)guest_base, | ||
561 | guest_limit / getpagesize(), start }; | ||
562 | verbose("Guest: %p - %p (%#lx)\n", | ||
563 | guest_base, guest_base + guest_limit, guest_limit); | ||
564 | lguest_fd = open_or_die("/dev/lguest", O_RDWR); | ||
565 | if (write(lguest_fd, args, sizeof(args)) < 0) | ||
566 | err(1, "Writing to /dev/lguest"); | ||
567 | } | ||
568 | /*:*/ | ||
569 | |||
570 | /*L:200 | ||
571 | * Device Handling. | ||
572 | * | ||
573 | * When the Guest gives us a buffer, it sends an array of addresses and sizes. | ||
574 | * We need to make sure it's not trying to reach into the Launcher itself, so | ||
575 | * we have a convenient routine which checks it and exits with an error message | ||
576 | * if something funny is going on: | ||
577 | */ | ||
578 | static void *_check_pointer(unsigned long addr, unsigned int size, | ||
579 | unsigned int line) | ||
580 | { | ||
581 | /* | ||
582 | * Check if the requested address and size exceeds the allocated memory, | ||
583 | * or addr + size wraps around. | ||
584 | */ | ||
585 | if ((addr + size) > guest_limit || (addr + size) < addr) | ||
586 | errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); | ||
587 | /* | ||
588 | * We return a pointer for the caller's convenience, now we know it's | ||
589 | * safe to use. | ||
590 | */ | ||
591 | return from_guest_phys(addr); | ||
592 | } | ||
593 | /* A macro which transparently hands the line number to the real function. */ | ||
594 | #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) | ||
595 | |||
596 | /* | ||
597 | * Each buffer in the virtqueues is actually a chain of descriptors. This | ||
598 | * function returns the next descriptor in the chain, or vq->vring.num if we're | ||
599 | * at the end. | ||
600 | */ | ||
601 | static unsigned next_desc(struct vring_desc *desc, | ||
602 | unsigned int i, unsigned int max) | ||
603 | { | ||
604 | unsigned int next; | ||
605 | |||
606 | /* If this descriptor says it doesn't chain, we're done. */ | ||
607 | if (!(desc[i].flags & VRING_DESC_F_NEXT)) | ||
608 | return max; | ||
609 | |||
610 | /* Check they're not leading us off end of descriptors. */ | ||
611 | next = desc[i].next; | ||
612 | /* Make sure compiler knows to grab that: we don't want it changing! */ | ||
613 | wmb(); | ||
614 | |||
615 | if (next >= max) | ||
616 | errx(1, "Desc next is %u", next); | ||
617 | |||
618 | return next; | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * This actually sends the interrupt for this virtqueue, if we've used a | ||
623 | * buffer. | ||
624 | */ | ||
625 | static void trigger_irq(struct virtqueue *vq) | ||
626 | { | ||
627 | unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; | ||
628 | |||
629 | /* Don't inform them if nothing used. */ | ||
630 | if (!vq->pending_used) | ||
631 | return; | ||
632 | vq->pending_used = 0; | ||
633 | |||
634 | /* If they don't want an interrupt, don't send one... */ | ||
635 | if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | /* Send the Guest an interrupt tell them we used something up. */ | ||
640 | if (write(lguest_fd, buf, sizeof(buf)) != 0) | ||
641 | err(1, "Triggering irq %i", vq->config.irq); | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * This looks in the virtqueue for the first available buffer, and converts | ||
646 | * it to an iovec for convenient access. Since descriptors consist of some | ||
647 | * number of output then some number of input descriptors, it's actually two | ||
648 | * iovecs, but we pack them into one and note how many of each there were. | ||
649 | * | ||
650 | * This function waits if necessary, and returns the descriptor number found. | ||
651 | */ | ||
652 | static unsigned wait_for_vq_desc(struct virtqueue *vq, | ||
653 | struct iovec iov[], | ||
654 | unsigned int *out_num, unsigned int *in_num) | ||
655 | { | ||
656 | unsigned int i, head, max; | ||
657 | struct vring_desc *desc; | ||
658 | u16 last_avail = lg_last_avail(vq); | ||
659 | |||
660 | /* There's nothing available? */ | ||
661 | while (last_avail == vq->vring.avail->idx) { | ||
662 | u64 event; | ||
663 | |||
664 | /* | ||
665 | * Since we're about to sleep, now is a good time to tell the | ||
666 | * Guest about what we've used up to now. | ||
667 | */ | ||
668 | trigger_irq(vq); | ||
669 | |||
670 | /* OK, now we need to know about added descriptors. */ | ||
671 | vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; | ||
672 | |||
673 | /* | ||
674 | * They could have slipped one in as we were doing that: make | ||
675 | * sure it's written, then check again. | ||
676 | */ | ||
677 | mb(); | ||
678 | if (last_avail != vq->vring.avail->idx) { | ||
679 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
680 | break; | ||
681 | } | ||
682 | |||
683 | /* Nothing new? Wait for eventfd to tell us they refilled. */ | ||
684 | if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event)) | ||
685 | errx(1, "Event read failed?"); | ||
686 | |||
687 | /* We don't need to be notified again. */ | ||
688 | vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; | ||
689 | } | ||
690 | |||
691 | /* Check it isn't doing very strange things with descriptor numbers. */ | ||
692 | if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) | ||
693 | errx(1, "Guest moved used index from %u to %u", | ||
694 | last_avail, vq->vring.avail->idx); | ||
695 | |||
696 | /* | ||
697 | * Grab the next descriptor number they're advertising, and increment | ||
698 | * the index we've seen. | ||
699 | */ | ||
700 | head = vq->vring.avail->ring[last_avail % vq->vring.num]; | ||
701 | lg_last_avail(vq)++; | ||
702 | |||
703 | /* If their number is silly, that's a fatal mistake. */ | ||
704 | if (head >= vq->vring.num) | ||
705 | errx(1, "Guest says index %u is available", head); | ||
706 | |||
707 | /* When we start there are none of either input nor output. */ | ||
708 | *out_num = *in_num = 0; | ||
709 | |||
710 | max = vq->vring.num; | ||
711 | desc = vq->vring.desc; | ||
712 | i = head; | ||
713 | |||
714 | /* | ||
715 | * If this is an indirect entry, then this buffer contains a descriptor | ||
716 | * table which we handle as if it's any normal descriptor chain. | ||
717 | */ | ||
718 | if (desc[i].flags & VRING_DESC_F_INDIRECT) { | ||
719 | if (desc[i].len % sizeof(struct vring_desc)) | ||
720 | errx(1, "Invalid size for indirect buffer table"); | ||
721 | |||
722 | max = desc[i].len / sizeof(struct vring_desc); | ||
723 | desc = check_pointer(desc[i].addr, desc[i].len); | ||
724 | i = 0; | ||
725 | } | ||
726 | |||
727 | do { | ||
728 | /* Grab the first descriptor, and check it's OK. */ | ||
729 | iov[*out_num + *in_num].iov_len = desc[i].len; | ||
730 | iov[*out_num + *in_num].iov_base | ||
731 | = check_pointer(desc[i].addr, desc[i].len); | ||
732 | /* If this is an input descriptor, increment that count. */ | ||
733 | if (desc[i].flags & VRING_DESC_F_WRITE) | ||
734 | (*in_num)++; | ||
735 | else { | ||
736 | /* | ||
737 | * If it's an output descriptor, they're all supposed | ||
738 | * to come before any input descriptors. | ||
739 | */ | ||
740 | if (*in_num) | ||
741 | errx(1, "Descriptor has out after in"); | ||
742 | (*out_num)++; | ||
743 | } | ||
744 | |||
745 | /* If we've got too many, that implies a descriptor loop. */ | ||
746 | if (*out_num + *in_num > max) | ||
747 | errx(1, "Looped descriptor"); | ||
748 | } while ((i = next_desc(desc, i, max)) != max); | ||
749 | |||
750 | return head; | ||
751 | } | ||
752 | |||
753 | /* | ||
754 | * After we've used one of their buffers, we tell the Guest about it. Sometime | ||
755 | * later we'll want to send them an interrupt using trigger_irq(); note that | ||
756 | * wait_for_vq_desc() does that for us if it has to wait. | ||
757 | */ | ||
758 | static void add_used(struct virtqueue *vq, unsigned int head, int len) | ||
759 | { | ||
760 | struct vring_used_elem *used; | ||
761 | |||
762 | /* | ||
763 | * The virtqueue contains a ring of used buffers. Get a pointer to the | ||
764 | * next entry in that used ring. | ||
765 | */ | ||
766 | used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; | ||
767 | used->id = head; | ||
768 | used->len = len; | ||
769 | /* Make sure buffer is written before we update index. */ | ||
770 | wmb(); | ||
771 | vq->vring.used->idx++; | ||
772 | vq->pending_used++; | ||
773 | } | ||
774 | |||
775 | /* And here's the combo meal deal. Supersize me! */ | ||
776 | static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) | ||
777 | { | ||
778 | add_used(vq, head, len); | ||
779 | trigger_irq(vq); | ||
780 | } | ||
781 | |||
782 | /* | ||
783 | * The Console | ||
784 | * | ||
785 | * We associate some data with the console for our exit hack. | ||
786 | */ | ||
787 | struct console_abort { | ||
788 | /* How many times have they hit ^C? */ | ||
789 | int count; | ||
790 | /* When did they start? */ | ||
791 | struct timeval start; | ||
792 | }; | ||
793 | |||
794 | /* This is the routine which handles console input (ie. stdin). */ | ||
795 | static void console_input(struct virtqueue *vq) | ||
796 | { | ||
797 | int len; | ||
798 | unsigned int head, in_num, out_num; | ||
799 | struct console_abort *abort = vq->dev->priv; | ||
800 | struct iovec iov[vq->vring.num]; | ||
801 | |||
802 | /* Make sure there's a descriptor available. */ | ||
803 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); | ||
804 | if (out_num) | ||
805 | errx(1, "Output buffers in console in queue?"); | ||
806 | |||
807 | /* Read into it. This is where we usually wait. */ | ||
808 | len = readv(STDIN_FILENO, iov, in_num); | ||
809 | if (len <= 0) { | ||
810 | /* Ran out of input? */ | ||
811 | warnx("Failed to get console input, ignoring console."); | ||
812 | /* | ||
813 | * For simplicity, dying threads kill the whole Launcher. So | ||
814 | * just nap here. | ||
815 | */ | ||
816 | for (;;) | ||
817 | pause(); | ||
818 | } | ||
819 | |||
820 | /* Tell the Guest we used a buffer. */ | ||
821 | add_used_and_trigger(vq, head, len); | ||
822 | |||
823 | /* | ||
824 | * Three ^C within one second? Exit. | ||
825 | * | ||
826 | * This is such a hack, but works surprisingly well. Each ^C has to | ||
827 | * be in a buffer by itself, so they can't be too fast. But we check | ||
828 | * that we get three within about a second, so they can't be too | ||
829 | * slow. | ||
830 | */ | ||
831 | if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) { | ||
832 | abort->count = 0; | ||
833 | return; | ||
834 | } | ||
835 | |||
836 | abort->count++; | ||
837 | if (abort->count == 1) | ||
838 | gettimeofday(&abort->start, NULL); | ||
839 | else if (abort->count == 3) { | ||
840 | struct timeval now; | ||
841 | gettimeofday(&now, NULL); | ||
842 | /* Kill all Launcher processes with SIGINT, like normal ^C */ | ||
843 | if (now.tv_sec <= abort->start.tv_sec+1) | ||
844 | kill(0, SIGINT); | ||
845 | abort->count = 0; | ||
846 | } | ||
847 | } | ||
848 | |||
849 | /* This is the routine which handles console output (ie. stdout). */ | ||
850 | static void console_output(struct virtqueue *vq) | ||
851 | { | ||
852 | unsigned int head, out, in; | ||
853 | struct iovec iov[vq->vring.num]; | ||
854 | |||
855 | /* We usually wait in here, for the Guest to give us something. */ | ||
856 | head = wait_for_vq_desc(vq, iov, &out, &in); | ||
857 | if (in) | ||
858 | errx(1, "Input buffers in console output queue?"); | ||
859 | |||
860 | /* writev can return a partial write, so we loop here. */ | ||
861 | while (!iov_empty(iov, out)) { | ||
862 | int len = writev(STDOUT_FILENO, iov, out); | ||
863 | if (len <= 0) { | ||
864 | warn("Write to stdout gave %i (%d)", len, errno); | ||
865 | break; | ||
866 | } | ||
867 | iov_consume(iov, out, len); | ||
868 | } | ||
869 | |||
870 | /* | ||
871 | * We're finished with that buffer: if we're going to sleep, | ||
872 | * wait_for_vq_desc() will prod the Guest with an interrupt. | ||
873 | */ | ||
874 | add_used(vq, head, 0); | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * The Network | ||
879 | * | ||
880 | * Handling output for network is also simple: we get all the output buffers | ||
881 | * and write them to /dev/net/tun. | ||
882 | */ | ||
883 | struct net_info { | ||
884 | int tunfd; | ||
885 | }; | ||
886 | |||
887 | static void net_output(struct virtqueue *vq) | ||
888 | { | ||
889 | struct net_info *net_info = vq->dev->priv; | ||
890 | unsigned int head, out, in; | ||
891 | struct iovec iov[vq->vring.num]; | ||
892 | |||
893 | /* We usually wait in here for the Guest to give us a packet. */ | ||
894 | head = wait_for_vq_desc(vq, iov, &out, &in); | ||
895 | if (in) | ||
896 | errx(1, "Input buffers in net output queue?"); | ||
897 | /* | ||
898 | * Send the whole thing through to /dev/net/tun. It expects the exact | ||
899 | * same format: what a coincidence! | ||
900 | */ | ||
901 | if (writev(net_info->tunfd, iov, out) < 0) | ||
902 | warnx("Write to tun failed (%d)?", errno); | ||
903 | |||
904 | /* | ||
905 | * Done with that one; wait_for_vq_desc() will send the interrupt if | ||
906 | * all packets are processed. | ||
907 | */ | ||
908 | add_used(vq, head, 0); | ||
909 | } | ||
910 | |||
911 | /* | ||
912 | * Handling network input is a bit trickier, because I've tried to optimize it. | ||
913 | * | ||
914 | * First we have a helper routine which tells is if from this file descriptor | ||
915 | * (ie. the /dev/net/tun device) will block: | ||
916 | */ | ||
917 | static bool will_block(int fd) | ||
918 | { | ||
919 | fd_set fdset; | ||
920 | struct timeval zero = { 0, 0 }; | ||
921 | FD_ZERO(&fdset); | ||
922 | FD_SET(fd, &fdset); | ||
923 | return select(fd+1, &fdset, NULL, NULL, &zero) != 1; | ||
924 | } | ||
925 | |||
926 | /* | ||
927 | * This handles packets coming in from the tun device to our Guest. Like all | ||
928 | * service routines, it gets called again as soon as it returns, so you don't | ||
929 | * see a while(1) loop here. | ||
930 | */ | ||
931 | static void net_input(struct virtqueue *vq) | ||
932 | { | ||
933 | int len; | ||
934 | unsigned int head, out, in; | ||
935 | struct iovec iov[vq->vring.num]; | ||
936 | struct net_info *net_info = vq->dev->priv; | ||
937 | |||
938 | /* | ||
939 | * Get a descriptor to write an incoming packet into. This will also | ||
940 | * send an interrupt if they're out of descriptors. | ||
941 | */ | ||
942 | head = wait_for_vq_desc(vq, iov, &out, &in); | ||
943 | if (out) | ||
944 | errx(1, "Output buffers in net input queue?"); | ||
945 | |||
946 | /* | ||
947 | * If it looks like we'll block reading from the tun device, send them | ||
948 | * an interrupt. | ||
949 | */ | ||
950 | if (vq->pending_used && will_block(net_info->tunfd)) | ||
951 | trigger_irq(vq); | ||
952 | |||
953 | /* | ||
954 | * Read in the packet. This is where we normally wait (when there's no | ||
955 | * incoming network traffic). | ||
956 | */ | ||
957 | len = readv(net_info->tunfd, iov, in); | ||
958 | if (len <= 0) | ||
959 | warn("Failed to read from tun (%d).", errno); | ||
960 | |||
961 | /* | ||
962 | * Mark that packet buffer as used, but don't interrupt here. We want | ||
963 | * to wait until we've done as much work as we can. | ||
964 | */ | ||
965 | add_used(vq, head, len); | ||
966 | } | ||
967 | /*:*/ | ||
968 | |||
969 | /* This is the helper to create threads: run the service routine in a loop. */ | ||
970 | static int do_thread(void *_vq) | ||
971 | { | ||
972 | struct virtqueue *vq = _vq; | ||
973 | |||
974 | for (;;) | ||
975 | vq->service(vq); | ||
976 | return 0; | ||
977 | } | ||
978 | |||
979 | /* | ||
980 | * When a child dies, we kill our entire process group with SIGTERM. This | ||
981 | * also has the side effect that the shell restores the console for us! | ||
982 | */ | ||
983 | static void kill_launcher(int signal) | ||
984 | { | ||
985 | kill(0, SIGTERM); | ||
986 | } | ||
987 | |||
988 | static void reset_device(struct device *dev) | ||
989 | { | ||
990 | struct virtqueue *vq; | ||
991 | |||
992 | verbose("Resetting device %s\n", dev->name); | ||
993 | |||
994 | /* Clear any features they've acked. */ | ||
995 | memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); | ||
996 | |||
997 | /* We're going to be explicitly killing threads, so ignore them. */ | ||
998 | signal(SIGCHLD, SIG_IGN); | ||
999 | |||
1000 | /* Zero out the virtqueues, get rid of their threads */ | ||
1001 | for (vq = dev->vq; vq; vq = vq->next) { | ||
1002 | if (vq->thread != (pid_t)-1) { | ||
1003 | kill(vq->thread, SIGTERM); | ||
1004 | waitpid(vq->thread, NULL, 0); | ||
1005 | vq->thread = (pid_t)-1; | ||
1006 | } | ||
1007 | memset(vq->vring.desc, 0, | ||
1008 | vring_size(vq->config.num, LGUEST_VRING_ALIGN)); | ||
1009 | lg_last_avail(vq) = 0; | ||
1010 | } | ||
1011 | dev->running = false; | ||
1012 | |||
1013 | /* Now we care if threads die. */ | ||
1014 | signal(SIGCHLD, (void *)kill_launcher); | ||
1015 | } | ||
1016 | |||
1017 | /*L:216 | ||
1018 | * This actually creates the thread which services the virtqueue for a device. | ||
1019 | */ | ||
1020 | static void create_thread(struct virtqueue *vq) | ||
1021 | { | ||
1022 | /* | ||
1023 | * Create stack for thread. Since the stack grows upwards, we point | ||
1024 | * the stack pointer to the end of this region. | ||
1025 | */ | ||
1026 | char *stack = malloc(32768); | ||
1027 | unsigned long args[] = { LHREQ_EVENTFD, | ||
1028 | vq->config.pfn*getpagesize(), 0 }; | ||
1029 | |||
1030 | /* Create a zero-initialized eventfd. */ | ||
1031 | vq->eventfd = eventfd(0, 0); | ||
1032 | if (vq->eventfd < 0) | ||
1033 | err(1, "Creating eventfd"); | ||
1034 | args[2] = vq->eventfd; | ||
1035 | |||
1036 | /* | ||
1037 | * Attach an eventfd to this virtqueue: it will go off when the Guest | ||
1038 | * does an LHCALL_NOTIFY for this vq. | ||
1039 | */ | ||
1040 | if (write(lguest_fd, &args, sizeof(args)) != 0) | ||
1041 | err(1, "Attaching eventfd"); | ||
1042 | |||
1043 | /* | ||
1044 | * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so | ||
1045 | * we get a signal if it dies. | ||
1046 | */ | ||
1047 | vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); | ||
1048 | if (vq->thread == (pid_t)-1) | ||
1049 | err(1, "Creating clone"); | ||
1050 | |||
1051 | /* We close our local copy now the child has it. */ | ||
1052 | close(vq->eventfd); | ||
1053 | } | ||
1054 | |||
1055 | static void start_device(struct device *dev) | ||
1056 | { | ||
1057 | unsigned int i; | ||
1058 | struct virtqueue *vq; | ||
1059 | |||
1060 | verbose("Device %s OK: offered", dev->name); | ||
1061 | for (i = 0; i < dev->feature_len; i++) | ||
1062 | verbose(" %02x", get_feature_bits(dev)[i]); | ||
1063 | verbose(", accepted"); | ||
1064 | for (i = 0; i < dev->feature_len; i++) | ||
1065 | verbose(" %02x", get_feature_bits(dev) | ||
1066 | [dev->feature_len+i]); | ||
1067 | |||
1068 | for (vq = dev->vq; vq; vq = vq->next) { | ||
1069 | if (vq->service) | ||
1070 | create_thread(vq); | ||
1071 | } | ||
1072 | dev->running = true; | ||
1073 | } | ||
1074 | |||
1075 | static void cleanup_devices(void) | ||
1076 | { | ||
1077 | struct device *dev; | ||
1078 | |||
1079 | for (dev = devices.dev; dev; dev = dev->next) | ||
1080 | reset_device(dev); | ||
1081 | |||
1082 | /* If we saved off the original terminal settings, restore them now. */ | ||
1083 | if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) | ||
1084 | tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); | ||
1085 | } | ||
1086 | |||
1087 | /* When the Guest tells us they updated the status field, we handle it. */ | ||
1088 | static void update_device_status(struct device *dev) | ||
1089 | { | ||
1090 | /* A zero status is a reset, otherwise it's a set of flags. */ | ||
1091 | if (dev->desc->status == 0) | ||
1092 | reset_device(dev); | ||
1093 | else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { | ||
1094 | warnx("Device %s configuration FAILED", dev->name); | ||
1095 | if (dev->running) | ||
1096 | reset_device(dev); | ||
1097 | } else { | ||
1098 | if (dev->running) | ||
1099 | err(1, "Device %s features finalized twice", dev->name); | ||
1100 | start_device(dev); | ||
1101 | } | ||
1102 | } | ||
1103 | |||
1104 | /*L:215 | ||
1105 | * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In | ||
1106 | * particular, it's used to notify us of device status changes during boot. | ||
1107 | */ | ||
1108 | static void handle_output(unsigned long addr) | ||
1109 | { | ||
1110 | struct device *i; | ||
1111 | |||
1112 | /* Check each device. */ | ||
1113 | for (i = devices.dev; i; i = i->next) { | ||
1114 | struct virtqueue *vq; | ||
1115 | |||
1116 | /* | ||
1117 | * Notifications to device descriptors mean they updated the | ||
1118 | * device status. | ||
1119 | */ | ||
1120 | if (from_guest_phys(addr) == i->desc) { | ||
1121 | update_device_status(i); | ||
1122 | return; | ||
1123 | } | ||
1124 | |||
1125 | /* Devices should not be used before features are finalized. */ | ||
1126 | for (vq = i->vq; vq; vq = vq->next) { | ||
1127 | if (addr != vq->config.pfn*getpagesize()) | ||
1128 | continue; | ||
1129 | errx(1, "Notification on %s before setup!", i->name); | ||
1130 | } | ||
1131 | } | ||
1132 | |||
1133 | /* | ||
1134 | * Early console write is done using notify on a nul-terminated string | ||
1135 | * in Guest memory. It's also great for hacking debugging messages | ||
1136 | * into a Guest. | ||
1137 | */ | ||
1138 | if (addr >= guest_limit) | ||
1139 | errx(1, "Bad NOTIFY %#lx", addr); | ||
1140 | |||
1141 | write(STDOUT_FILENO, from_guest_phys(addr), | ||
1142 | strnlen(from_guest_phys(addr), guest_limit - addr)); | ||
1143 | } | ||
1144 | |||
1145 | /*L:190 | ||
1146 | * Device Setup | ||
1147 | * | ||
1148 | * All devices need a descriptor so the Guest knows it exists, and a "struct | ||
1149 | * device" so the Launcher can keep track of it. We have common helper | ||
1150 | * routines to allocate and manage them. | ||
1151 | */ | ||
1152 | |||
1153 | /* | ||
1154 | * The layout of the device page is a "struct lguest_device_desc" followed by a | ||
1155 | * number of virtqueue descriptors, then two sets of feature bits, then an | ||
1156 | * array of configuration bytes. This routine returns the configuration | ||
1157 | * pointer. | ||
1158 | */ | ||
1159 | static u8 *device_config(const struct device *dev) | ||
1160 | { | ||
1161 | return (void *)(dev->desc + 1) | ||
1162 | + dev->num_vq * sizeof(struct lguest_vqconfig) | ||
1163 | + dev->feature_len * 2; | ||
1164 | } | ||
1165 | |||
1166 | /* | ||
1167 | * This routine allocates a new "struct lguest_device_desc" from descriptor | ||
1168 | * table page just above the Guest's normal memory. It returns a pointer to | ||
1169 | * that descriptor. | ||
1170 | */ | ||
1171 | static struct lguest_device_desc *new_dev_desc(u16 type) | ||
1172 | { | ||
1173 | struct lguest_device_desc d = { .type = type }; | ||
1174 | void *p; | ||
1175 | |||
1176 | /* Figure out where the next device config is, based on the last one. */ | ||
1177 | if (devices.lastdev) | ||
1178 | p = device_config(devices.lastdev) | ||
1179 | + devices.lastdev->desc->config_len; | ||
1180 | else | ||
1181 | p = devices.descpage; | ||
1182 | |||
1183 | /* We only have one page for all the descriptors. */ | ||
1184 | if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) | ||
1185 | errx(1, "Too many devices"); | ||
1186 | |||
1187 | /* p might not be aligned, so we memcpy in. */ | ||
1188 | return memcpy(p, &d, sizeof(d)); | ||
1189 | } | ||
1190 | |||
1191 | /* | ||
1192 | * Each device descriptor is followed by the description of its virtqueues. We | ||
1193 | * specify how many descriptors the virtqueue is to have. | ||
1194 | */ | ||
1195 | static void add_virtqueue(struct device *dev, unsigned int num_descs, | ||
1196 | void (*service)(struct virtqueue *)) | ||
1197 | { | ||
1198 | unsigned int pages; | ||
1199 | struct virtqueue **i, *vq = malloc(sizeof(*vq)); | ||
1200 | void *p; | ||
1201 | |||
1202 | /* First we need some memory for this virtqueue. */ | ||
1203 | pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) | ||
1204 | / getpagesize(); | ||
1205 | p = get_pages(pages); | ||
1206 | |||
1207 | /* Initialize the virtqueue */ | ||
1208 | vq->next = NULL; | ||
1209 | vq->last_avail_idx = 0; | ||
1210 | vq->dev = dev; | ||
1211 | |||
1212 | /* | ||
1213 | * This is the routine the service thread will run, and its Process ID | ||
1214 | * once it's running. | ||
1215 | */ | ||
1216 | vq->service = service; | ||
1217 | vq->thread = (pid_t)-1; | ||
1218 | |||
1219 | /* Initialize the configuration. */ | ||
1220 | vq->config.num = num_descs; | ||
1221 | vq->config.irq = devices.next_irq++; | ||
1222 | vq->config.pfn = to_guest_phys(p) / getpagesize(); | ||
1223 | |||
1224 | /* Initialize the vring. */ | ||
1225 | vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); | ||
1226 | |||
1227 | /* | ||
1228 | * Append virtqueue to this device's descriptor. We use | ||
1229 | * device_config() to get the end of the device's current virtqueues; | ||
1230 | * we check that we haven't added any config or feature information | ||
1231 | * yet, otherwise we'd be overwriting them. | ||
1232 | */ | ||
1233 | assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); | ||
1234 | memcpy(device_config(dev), &vq->config, sizeof(vq->config)); | ||
1235 | dev->num_vq++; | ||
1236 | dev->desc->num_vq++; | ||
1237 | |||
1238 | verbose("Virtqueue page %#lx\n", to_guest_phys(p)); | ||
1239 | |||
1240 | /* | ||
1241 | * Add to tail of list, so dev->vq is first vq, dev->vq->next is | ||
1242 | * second. | ||
1243 | */ | ||
1244 | for (i = &dev->vq; *i; i = &(*i)->next); | ||
1245 | *i = vq; | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * The first half of the feature bitmask is for us to advertise features. The | ||
1250 | * second half is for the Guest to accept features. | ||
1251 | */ | ||
1252 | static void add_feature(struct device *dev, unsigned bit) | ||
1253 | { | ||
1254 | u8 *features = get_feature_bits(dev); | ||
1255 | |||
1256 | /* We can't extend the feature bits once we've added config bytes */ | ||
1257 | if (dev->desc->feature_len <= bit / CHAR_BIT) { | ||
1258 | assert(dev->desc->config_len == 0); | ||
1259 | dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; | ||
1260 | } | ||
1261 | |||
1262 | features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); | ||
1263 | } | ||
1264 | |||
1265 | /* | ||
1266 | * This routine sets the configuration fields for an existing device's | ||
1267 | * descriptor. It only works for the last device, but that's OK because that's | ||
1268 | * how we use it. | ||
1269 | */ | ||
1270 | static void set_config(struct device *dev, unsigned len, const void *conf) | ||
1271 | { | ||
1272 | /* Check we haven't overflowed our single page. */ | ||
1273 | if (device_config(dev) + len > devices.descpage + getpagesize()) | ||
1274 | errx(1, "Too many devices"); | ||
1275 | |||
1276 | /* Copy in the config information, and store the length. */ | ||
1277 | memcpy(device_config(dev), conf, len); | ||
1278 | dev->desc->config_len = len; | ||
1279 | |||
1280 | /* Size must fit in config_len field (8 bits)! */ | ||
1281 | assert(dev->desc->config_len == len); | ||
1282 | } | ||
1283 | |||
1284 | /* | ||
1285 | * This routine does all the creation and setup of a new device, including | ||
1286 | * calling new_dev_desc() to allocate the descriptor and device memory. We | ||
1287 | * don't actually start the service threads until later. | ||
1288 | * | ||
1289 | * See what I mean about userspace being boring? | ||
1290 | */ | ||
1291 | static struct device *new_device(const char *name, u16 type) | ||
1292 | { | ||
1293 | struct device *dev = malloc(sizeof(*dev)); | ||
1294 | |||
1295 | /* Now we populate the fields one at a time. */ | ||
1296 | dev->desc = new_dev_desc(type); | ||
1297 | dev->name = name; | ||
1298 | dev->vq = NULL; | ||
1299 | dev->feature_len = 0; | ||
1300 | dev->num_vq = 0; | ||
1301 | dev->running = false; | ||
1302 | |||
1303 | /* | ||
1304 | * Append to device list. Prepending to a single-linked list is | ||
1305 | * easier, but the user expects the devices to be arranged on the bus | ||
1306 | * in command-line order. The first network device on the command line | ||
1307 | * is eth0, the first block device /dev/vda, etc. | ||
1308 | */ | ||
1309 | if (devices.lastdev) | ||
1310 | devices.lastdev->next = dev; | ||
1311 | else | ||
1312 | devices.dev = dev; | ||
1313 | devices.lastdev = dev; | ||
1314 | |||
1315 | return dev; | ||
1316 | } | ||
1317 | |||
1318 | /* | ||
1319 | * Our first setup routine is the console. It's a fairly simple device, but | ||
1320 | * UNIX tty handling makes it uglier than it could be. | ||
1321 | */ | ||
1322 | static void setup_console(void) | ||
1323 | { | ||
1324 | struct device *dev; | ||
1325 | |||
1326 | /* If we can save the initial standard input settings... */ | ||
1327 | if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { | ||
1328 | struct termios term = orig_term; | ||
1329 | /* | ||
1330 | * Then we turn off echo, line buffering and ^C etc: We want a | ||
1331 | * raw input stream to the Guest. | ||
1332 | */ | ||
1333 | term.c_lflag &= ~(ISIG|ICANON|ECHO); | ||
1334 | tcsetattr(STDIN_FILENO, TCSANOW, &term); | ||
1335 | } | ||
1336 | |||
1337 | dev = new_device("console", VIRTIO_ID_CONSOLE); | ||
1338 | |||
1339 | /* We store the console state in dev->priv, and initialize it. */ | ||
1340 | dev->priv = malloc(sizeof(struct console_abort)); | ||
1341 | ((struct console_abort *)dev->priv)->count = 0; | ||
1342 | |||
1343 | /* | ||
1344 | * The console needs two virtqueues: the input then the output. When | ||
1345 | * they put something the input queue, we make sure we're listening to | ||
1346 | * stdin. When they put something in the output queue, we write it to | ||
1347 | * stdout. | ||
1348 | */ | ||
1349 | add_virtqueue(dev, VIRTQUEUE_NUM, console_input); | ||
1350 | add_virtqueue(dev, VIRTQUEUE_NUM, console_output); | ||
1351 | |||
1352 | verbose("device %u: console\n", ++devices.device_num); | ||
1353 | } | ||
1354 | /*:*/ | ||
1355 | |||
1356 | /*M:010 | ||
1357 | * Inter-guest networking is an interesting area. Simplest is to have a | ||
1358 | * --sharenet=<name> option which opens or creates a named pipe. This can be | ||
1359 | * used to send packets to another guest in a 1:1 manner. | ||
1360 | * | ||
1361 | * More sophisticated is to use one of the tools developed for project like UML | ||
1362 | * to do networking. | ||
1363 | * | ||
1364 | * Faster is to do virtio bonding in kernel. Doing this 1:1 would be | ||
1365 | * completely generic ("here's my vring, attach to your vring") and would work | ||
1366 | * for any traffic. Of course, namespace and permissions issues need to be | ||
1367 | * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide | ||
1368 | * multiple inter-guest channels behind one interface, although it would | ||
1369 | * require some manner of hotplugging new virtio channels. | ||
1370 | * | ||
1371 | * Finally, we could use a virtio network switch in the kernel, ie. vhost. | ||
1372 | :*/ | ||
1373 | |||
1374 | static u32 str2ip(const char *ipaddr) | ||
1375 | { | ||
1376 | unsigned int b[4]; | ||
1377 | |||
1378 | if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4) | ||
1379 | errx(1, "Failed to parse IP address '%s'", ipaddr); | ||
1380 | return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; | ||
1381 | } | ||
1382 | |||
1383 | static void str2mac(const char *macaddr, unsigned char mac[6]) | ||
1384 | { | ||
1385 | unsigned int m[6]; | ||
1386 | if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x", | ||
1387 | &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6) | ||
1388 | errx(1, "Failed to parse mac address '%s'", macaddr); | ||
1389 | mac[0] = m[0]; | ||
1390 | mac[1] = m[1]; | ||
1391 | mac[2] = m[2]; | ||
1392 | mac[3] = m[3]; | ||
1393 | mac[4] = m[4]; | ||
1394 | mac[5] = m[5]; | ||
1395 | } | ||
1396 | |||
1397 | /* | ||
1398 | * This code is "adapted" from libbridge: it attaches the Host end of the | ||
1399 | * network device to the bridge device specified by the command line. | ||
1400 | * | ||
1401 | * This is yet another James Morris contribution (I'm an IP-level guy, so I | ||
1402 | * dislike bridging), and I just try not to break it. | ||
1403 | */ | ||
1404 | static void add_to_bridge(int fd, const char *if_name, const char *br_name) | ||
1405 | { | ||
1406 | int ifidx; | ||
1407 | struct ifreq ifr; | ||
1408 | |||
1409 | if (!*br_name) | ||
1410 | errx(1, "must specify bridge name"); | ||
1411 | |||
1412 | ifidx = if_nametoindex(if_name); | ||
1413 | if (!ifidx) | ||
1414 | errx(1, "interface %s does not exist!", if_name); | ||
1415 | |||
1416 | strncpy(ifr.ifr_name, br_name, IFNAMSIZ); | ||
1417 | ifr.ifr_name[IFNAMSIZ-1] = '\0'; | ||
1418 | ifr.ifr_ifindex = ifidx; | ||
1419 | if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) | ||
1420 | err(1, "can't add %s to bridge %s", if_name, br_name); | ||
1421 | } | ||
1422 | |||
1423 | /* | ||
1424 | * This sets up the Host end of the network device with an IP address, brings | ||
1425 | * it up so packets will flow, the copies the MAC address into the hwaddr | ||
1426 | * pointer. | ||
1427 | */ | ||
1428 | static void configure_device(int fd, const char *tapif, u32 ipaddr) | ||
1429 | { | ||
1430 | struct ifreq ifr; | ||
1431 | struct sockaddr_in sin; | ||
1432 | |||
1433 | memset(&ifr, 0, sizeof(ifr)); | ||
1434 | strcpy(ifr.ifr_name, tapif); | ||
1435 | |||
1436 | /* Don't read these incantations. Just cut & paste them like I did! */ | ||
1437 | sin.sin_family = AF_INET; | ||
1438 | sin.sin_addr.s_addr = htonl(ipaddr); | ||
1439 | memcpy(&ifr.ifr_addr, &sin, sizeof(sin)); | ||
1440 | if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) | ||
1441 | err(1, "Setting %s interface address", tapif); | ||
1442 | ifr.ifr_flags = IFF_UP; | ||
1443 | if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) | ||
1444 | err(1, "Bringing interface %s up", tapif); | ||
1445 | } | ||
1446 | |||
1447 | static int get_tun_device(char tapif[IFNAMSIZ]) | ||
1448 | { | ||
1449 | struct ifreq ifr; | ||
1450 | int netfd; | ||
1451 | |||
1452 | /* Start with this zeroed. Messy but sure. */ | ||
1453 | memset(&ifr, 0, sizeof(ifr)); | ||
1454 | |||
1455 | /* | ||
1456 | * We open the /dev/net/tun device and tell it we want a tap device. A | ||
1457 | * tap device is like a tun device, only somehow different. To tell | ||
1458 | * the truth, I completely blundered my way through this code, but it | ||
1459 | * works now! | ||
1460 | */ | ||
1461 | netfd = open_or_die("/dev/net/tun", O_RDWR); | ||
1462 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; | ||
1463 | strcpy(ifr.ifr_name, "tap%d"); | ||
1464 | if (ioctl(netfd, TUNSETIFF, &ifr) != 0) | ||
1465 | err(1, "configuring /dev/net/tun"); | ||
1466 | |||
1467 | if (ioctl(netfd, TUNSETOFFLOAD, | ||
1468 | TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) | ||
1469 | err(1, "Could not set features for tun device"); | ||
1470 | |||
1471 | /* | ||
1472 | * We don't need checksums calculated for packets coming in this | ||
1473 | * device: trust us! | ||
1474 | */ | ||
1475 | ioctl(netfd, TUNSETNOCSUM, 1); | ||
1476 | |||
1477 | memcpy(tapif, ifr.ifr_name, IFNAMSIZ); | ||
1478 | return netfd; | ||
1479 | } | ||
1480 | |||
1481 | /*L:195 | ||
1482 | * Our network is a Host<->Guest network. This can either use bridging or | ||
1483 | * routing, but the principle is the same: it uses the "tun" device to inject | ||
1484 | * packets into the Host as if they came in from a normal network card. We | ||
1485 | * just shunt packets between the Guest and the tun device. | ||
1486 | */ | ||
1487 | static void setup_tun_net(char *arg) | ||
1488 | { | ||
1489 | struct device *dev; | ||
1490 | struct net_info *net_info = malloc(sizeof(*net_info)); | ||
1491 | int ipfd; | ||
1492 | u32 ip = INADDR_ANY; | ||
1493 | bool bridging = false; | ||
1494 | char tapif[IFNAMSIZ], *p; | ||
1495 | struct virtio_net_config conf; | ||
1496 | |||
1497 | net_info->tunfd = get_tun_device(tapif); | ||
1498 | |||
1499 | /* First we create a new network device. */ | ||
1500 | dev = new_device("net", VIRTIO_ID_NET); | ||
1501 | dev->priv = net_info; | ||
1502 | |||
1503 | /* Network devices need a recv and a send queue, just like console. */ | ||
1504 | add_virtqueue(dev, VIRTQUEUE_NUM, net_input); | ||
1505 | add_virtqueue(dev, VIRTQUEUE_NUM, net_output); | ||
1506 | |||
1507 | /* | ||
1508 | * We need a socket to perform the magic network ioctls to bring up the | ||
1509 | * tap interface, connect to the bridge etc. Any socket will do! | ||
1510 | */ | ||
1511 | ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); | ||
1512 | if (ipfd < 0) | ||
1513 | err(1, "opening IP socket"); | ||
1514 | |||
1515 | /* If the command line was --tunnet=bridge:<name> do bridging. */ | ||
1516 | if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { | ||
1517 | arg += strlen(BRIDGE_PFX); | ||
1518 | bridging = true; | ||
1519 | } | ||
1520 | |||
1521 | /* A mac address may follow the bridge name or IP address */ | ||
1522 | p = strchr(arg, ':'); | ||
1523 | if (p) { | ||
1524 | str2mac(p+1, conf.mac); | ||
1525 | add_feature(dev, VIRTIO_NET_F_MAC); | ||
1526 | *p = '\0'; | ||
1527 | } | ||
1528 | |||
1529 | /* arg is now either an IP address or a bridge name */ | ||
1530 | if (bridging) | ||
1531 | add_to_bridge(ipfd, tapif, arg); | ||
1532 | else | ||
1533 | ip = str2ip(arg); | ||
1534 | |||
1535 | /* Set up the tun device. */ | ||
1536 | configure_device(ipfd, tapif, ip); | ||
1537 | |||
1538 | /* Expect Guest to handle everything except UFO */ | ||
1539 | add_feature(dev, VIRTIO_NET_F_CSUM); | ||
1540 | add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); | ||
1541 | add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); | ||
1542 | add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); | ||
1543 | add_feature(dev, VIRTIO_NET_F_GUEST_ECN); | ||
1544 | add_feature(dev, VIRTIO_NET_F_HOST_TSO4); | ||
1545 | add_feature(dev, VIRTIO_NET_F_HOST_TSO6); | ||
1546 | add_feature(dev, VIRTIO_NET_F_HOST_ECN); | ||
1547 | /* We handle indirect ring entries */ | ||
1548 | add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); | ||
1549 | set_config(dev, sizeof(conf), &conf); | ||
1550 | |||
1551 | /* We don't need the socket any more; setup is done. */ | ||
1552 | close(ipfd); | ||
1553 | |||
1554 | devices.device_num++; | ||
1555 | |||
1556 | if (bridging) | ||
1557 | verbose("device %u: tun %s attached to bridge: %s\n", | ||
1558 | devices.device_num, tapif, arg); | ||
1559 | else | ||
1560 | verbose("device %u: tun %s: %s\n", | ||
1561 | devices.device_num, tapif, arg); | ||
1562 | } | ||
1563 | /*:*/ | ||
1564 | |||
1565 | /* This hangs off device->priv. */ | ||
1566 | struct vblk_info { | ||
1567 | /* The size of the file. */ | ||
1568 | off64_t len; | ||
1569 | |||
1570 | /* The file descriptor for the file. */ | ||
1571 | int fd; | ||
1572 | |||
1573 | }; | ||
1574 | |||
1575 | /*L:210 | ||
1576 | * The Disk | ||
1577 | * | ||
1578 | * The disk only has one virtqueue, so it only has one thread. It is really | ||
1579 | * simple: the Guest asks for a block number and we read or write that position | ||
1580 | * in the file. | ||
1581 | * | ||
1582 | * Before we serviced each virtqueue in a separate thread, that was unacceptably | ||
1583 | * slow: the Guest waits until the read is finished before running anything | ||
1584 | * else, even if it could have been doing useful work. | ||
1585 | * | ||
1586 | * We could have used async I/O, except it's reputed to suck so hard that | ||
1587 | * characters actually go missing from your code when you try to use it. | ||
1588 | */ | ||
1589 | static void blk_request(struct virtqueue *vq) | ||
1590 | { | ||
1591 | struct vblk_info *vblk = vq->dev->priv; | ||
1592 | unsigned int head, out_num, in_num, wlen; | ||
1593 | int ret; | ||
1594 | u8 *in; | ||
1595 | struct virtio_blk_outhdr *out; | ||
1596 | struct iovec iov[vq->vring.num]; | ||
1597 | off64_t off; | ||
1598 | |||
1599 | /* | ||
1600 | * Get the next request, where we normally wait. It triggers the | ||
1601 | * interrupt to acknowledge previously serviced requests (if any). | ||
1602 | */ | ||
1603 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); | ||
1604 | |||
1605 | /* | ||
1606 | * Every block request should contain at least one output buffer | ||
1607 | * (detailing the location on disk and the type of request) and one | ||
1608 | * input buffer (to hold the result). | ||
1609 | */ | ||
1610 | if (out_num == 0 || in_num == 0) | ||
1611 | errx(1, "Bad virtblk cmd %u out=%u in=%u", | ||
1612 | head, out_num, in_num); | ||
1613 | |||
1614 | out = convert(&iov[0], struct virtio_blk_outhdr); | ||
1615 | in = convert(&iov[out_num+in_num-1], u8); | ||
1616 | /* | ||
1617 | * For historical reasons, block operations are expressed in 512 byte | ||
1618 | * "sectors". | ||
1619 | */ | ||
1620 | off = out->sector * 512; | ||
1621 | |||
1622 | /* | ||
1623 | * In general the virtio block driver is allowed to try SCSI commands. | ||
1624 | * It'd be nice if we supported eject, for example, but we don't. | ||
1625 | */ | ||
1626 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { | ||
1627 | fprintf(stderr, "Scsi commands unsupported\n"); | ||
1628 | *in = VIRTIO_BLK_S_UNSUPP; | ||
1629 | wlen = sizeof(*in); | ||
1630 | } else if (out->type & VIRTIO_BLK_T_OUT) { | ||
1631 | /* | ||
1632 | * Write | ||
1633 | * | ||
1634 | * Move to the right location in the block file. This can fail | ||
1635 | * if they try to write past end. | ||
1636 | */ | ||
1637 | if (lseek64(vblk->fd, off, SEEK_SET) != off) | ||
1638 | err(1, "Bad seek to sector %llu", out->sector); | ||
1639 | |||
1640 | ret = writev(vblk->fd, iov+1, out_num-1); | ||
1641 | verbose("WRITE to sector %llu: %i\n", out->sector, ret); | ||
1642 | |||
1643 | /* | ||
1644 | * Grr... Now we know how long the descriptor they sent was, we | ||
1645 | * make sure they didn't try to write over the end of the block | ||
1646 | * file (possibly extending it). | ||
1647 | */ | ||
1648 | if (ret > 0 && off + ret > vblk->len) { | ||
1649 | /* Trim it back to the correct length */ | ||
1650 | ftruncate64(vblk->fd, vblk->len); | ||
1651 | /* Die, bad Guest, die. */ | ||
1652 | errx(1, "Write past end %llu+%u", off, ret); | ||
1653 | } | ||
1654 | |||
1655 | wlen = sizeof(*in); | ||
1656 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); | ||
1657 | } else if (out->type & VIRTIO_BLK_T_FLUSH) { | ||
1658 | /* Flush */ | ||
1659 | ret = fdatasync(vblk->fd); | ||
1660 | verbose("FLUSH fdatasync: %i\n", ret); | ||
1661 | wlen = sizeof(*in); | ||
1662 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); | ||
1663 | } else { | ||
1664 | /* | ||
1665 | * Read | ||
1666 | * | ||
1667 | * Move to the right location in the block file. This can fail | ||
1668 | * if they try to read past end. | ||
1669 | */ | ||
1670 | if (lseek64(vblk->fd, off, SEEK_SET) != off) | ||
1671 | err(1, "Bad seek to sector %llu", out->sector); | ||
1672 | |||
1673 | ret = readv(vblk->fd, iov+1, in_num-1); | ||
1674 | verbose("READ from sector %llu: %i\n", out->sector, ret); | ||
1675 | if (ret >= 0) { | ||
1676 | wlen = sizeof(*in) + ret; | ||
1677 | *in = VIRTIO_BLK_S_OK; | ||
1678 | } else { | ||
1679 | wlen = sizeof(*in); | ||
1680 | *in = VIRTIO_BLK_S_IOERR; | ||
1681 | } | ||
1682 | } | ||
1683 | |||
1684 | /* Finished that request. */ | ||
1685 | add_used(vq, head, wlen); | ||
1686 | } | ||
1687 | |||
1688 | /*L:198 This actually sets up a virtual block device. */ | ||
1689 | static void setup_block_file(const char *filename) | ||
1690 | { | ||
1691 | struct device *dev; | ||
1692 | struct vblk_info *vblk; | ||
1693 | struct virtio_blk_config conf; | ||
1694 | |||
1695 | /* Creat the device. */ | ||
1696 | dev = new_device("block", VIRTIO_ID_BLOCK); | ||
1697 | |||
1698 | /* The device has one virtqueue, where the Guest places requests. */ | ||
1699 | add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); | ||
1700 | |||
1701 | /* Allocate the room for our own bookkeeping */ | ||
1702 | vblk = dev->priv = malloc(sizeof(*vblk)); | ||
1703 | |||
1704 | /* First we open the file and store the length. */ | ||
1705 | vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); | ||
1706 | vblk->len = lseek64(vblk->fd, 0, SEEK_END); | ||
1707 | |||
1708 | /* We support FLUSH. */ | ||
1709 | add_feature(dev, VIRTIO_BLK_F_FLUSH); | ||
1710 | |||
1711 | /* Tell Guest how many sectors this device has. */ | ||
1712 | conf.capacity = cpu_to_le64(vblk->len / 512); | ||
1713 | |||
1714 | /* | ||
1715 | * Tell Guest not to put in too many descriptors at once: two are used | ||
1716 | * for the in and out elements. | ||
1717 | */ | ||
1718 | add_feature(dev, VIRTIO_BLK_F_SEG_MAX); | ||
1719 | conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); | ||
1720 | |||
1721 | /* Don't try to put whole struct: we have 8 bit limit. */ | ||
1722 | set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf); | ||
1723 | |||
1724 | verbose("device %u: virtblock %llu sectors\n", | ||
1725 | ++devices.device_num, le64_to_cpu(conf.capacity)); | ||
1726 | } | ||
1727 | |||
1728 | /*L:211 | ||
1729 | * Our random number generator device reads from /dev/random into the Guest's | ||
1730 | * input buffers. The usual case is that the Guest doesn't want random numbers | ||
1731 | * and so has no buffers although /dev/random is still readable, whereas | ||
1732 | * console is the reverse. | ||
1733 | * | ||
1734 | * The same logic applies, however. | ||
1735 | */ | ||
1736 | struct rng_info { | ||
1737 | int rfd; | ||
1738 | }; | ||
1739 | |||
1740 | static void rng_input(struct virtqueue *vq) | ||
1741 | { | ||
1742 | int len; | ||
1743 | unsigned int head, in_num, out_num, totlen = 0; | ||
1744 | struct rng_info *rng_info = vq->dev->priv; | ||
1745 | struct iovec iov[vq->vring.num]; | ||
1746 | |||
1747 | /* First we need a buffer from the Guests's virtqueue. */ | ||
1748 | head = wait_for_vq_desc(vq, iov, &out_num, &in_num); | ||
1749 | if (out_num) | ||
1750 | errx(1, "Output buffers in rng?"); | ||
1751 | |||
1752 | /* | ||
1753 | * Just like the console write, we loop to cover the whole iovec. | ||
1754 | * In this case, short reads actually happen quite a bit. | ||
1755 | */ | ||
1756 | while (!iov_empty(iov, in_num)) { | ||
1757 | len = readv(rng_info->rfd, iov, in_num); | ||
1758 | if (len <= 0) | ||
1759 | err(1, "Read from /dev/random gave %i", len); | ||
1760 | iov_consume(iov, in_num, len); | ||
1761 | totlen += len; | ||
1762 | } | ||
1763 | |||
1764 | /* Tell the Guest about the new input. */ | ||
1765 | add_used(vq, head, totlen); | ||
1766 | } | ||
1767 | |||
1768 | /*L:199 | ||
1769 | * This creates a "hardware" random number device for the Guest. | ||
1770 | */ | ||
1771 | static void setup_rng(void) | ||
1772 | { | ||
1773 | struct device *dev; | ||
1774 | struct rng_info *rng_info = malloc(sizeof(*rng_info)); | ||
1775 | |||
1776 | /* Our device's privat info simply contains the /dev/random fd. */ | ||
1777 | rng_info->rfd = open_or_die("/dev/random", O_RDONLY); | ||
1778 | |||
1779 | /* Create the new device. */ | ||
1780 | dev = new_device("rng", VIRTIO_ID_RNG); | ||
1781 | dev->priv = rng_info; | ||
1782 | |||
1783 | /* The device has one virtqueue, where the Guest places inbufs. */ | ||
1784 | add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); | ||
1785 | |||
1786 | verbose("device %u: rng\n", devices.device_num++); | ||
1787 | } | ||
1788 | /* That's the end of device setup. */ | ||
1789 | |||
1790 | /*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ | ||
1791 | static void __attribute__((noreturn)) restart_guest(void) | ||
1792 | { | ||
1793 | unsigned int i; | ||
1794 | |||
1795 | /* | ||
1796 | * Since we don't track all open fds, we simply close everything beyond | ||
1797 | * stderr. | ||
1798 | */ | ||
1799 | for (i = 3; i < FD_SETSIZE; i++) | ||
1800 | close(i); | ||
1801 | |||
1802 | /* Reset all the devices (kills all threads). */ | ||
1803 | cleanup_devices(); | ||
1804 | |||
1805 | execv(main_args[0], main_args); | ||
1806 | err(1, "Could not exec %s", main_args[0]); | ||
1807 | } | ||
1808 | |||
1809 | /*L:220 | ||
1810 | * Finally we reach the core of the Launcher which runs the Guest, serves | ||
1811 | * its input and output, and finally, lays it to rest. | ||
1812 | */ | ||
1813 | static void __attribute__((noreturn)) run_guest(void) | ||
1814 | { | ||
1815 | for (;;) { | ||
1816 | unsigned long notify_addr; | ||
1817 | int readval; | ||
1818 | |||
1819 | /* We read from the /dev/lguest device to run the Guest. */ | ||
1820 | readval = pread(lguest_fd, ¬ify_addr, | ||
1821 | sizeof(notify_addr), cpu_id); | ||
1822 | |||
1823 | /* One unsigned long means the Guest did HCALL_NOTIFY */ | ||
1824 | if (readval == sizeof(notify_addr)) { | ||
1825 | verbose("Notify on address %#lx\n", notify_addr); | ||
1826 | handle_output(notify_addr); | ||
1827 | /* ENOENT means the Guest died. Reading tells us why. */ | ||
1828 | } else if (errno == ENOENT) { | ||
1829 | char reason[1024] = { 0 }; | ||
1830 | pread(lguest_fd, reason, sizeof(reason)-1, cpu_id); | ||
1831 | errx(1, "%s", reason); | ||
1832 | /* ERESTART means that we need to reboot the guest */ | ||
1833 | } else if (errno == ERESTART) { | ||
1834 | restart_guest(); | ||
1835 | /* Anything else means a bug or incompatible change. */ | ||
1836 | } else | ||
1837 | err(1, "Running guest failed"); | ||
1838 | } | ||
1839 | } | ||
1840 | /*L:240 | ||
1841 | * This is the end of the Launcher. The good news: we are over halfway | ||
1842 | * through! The bad news: the most fiendish part of the code still lies ahead | ||
1843 | * of us. | ||
1844 | * | ||
1845 | * Are you ready? Take a deep breath and join me in the core of the Host, in | ||
1846 | * "make Host". | ||
1847 | :*/ | ||
1848 | |||
1849 | static struct option opts[] = { | ||
1850 | { "verbose", 0, NULL, 'v' }, | ||
1851 | { "tunnet", 1, NULL, 't' }, | ||
1852 | { "block", 1, NULL, 'b' }, | ||
1853 | { "rng", 0, NULL, 'r' }, | ||
1854 | { "initrd", 1, NULL, 'i' }, | ||
1855 | { "username", 1, NULL, 'u' }, | ||
1856 | { "chroot", 1, NULL, 'c' }, | ||
1857 | { NULL }, | ||
1858 | }; | ||
1859 | static void usage(void) | ||
1860 | { | ||
1861 | errx(1, "Usage: lguest [--verbose] " | ||
1862 | "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n" | ||
1863 | "|--block=<filename>|--initrd=<filename>]...\n" | ||
1864 | "<mem-in-mb> vmlinux [args...]"); | ||
1865 | } | ||
1866 | |||
1867 | /*L:105 The main routine is where the real work begins: */ | ||
1868 | int main(int argc, char *argv[]) | ||
1869 | { | ||
1870 | /* Memory, code startpoint and size of the (optional) initrd. */ | ||
1871 | unsigned long mem = 0, start, initrd_size = 0; | ||
1872 | /* Two temporaries. */ | ||
1873 | int i, c; | ||
1874 | /* The boot information for the Guest. */ | ||
1875 | struct boot_params *boot; | ||
1876 | /* If they specify an initrd file to load. */ | ||
1877 | const char *initrd_name = NULL; | ||
1878 | |||
1879 | /* Password structure for initgroups/setres[gu]id */ | ||
1880 | struct passwd *user_details = NULL; | ||
1881 | |||
1882 | /* Directory to chroot to */ | ||
1883 | char *chroot_path = NULL; | ||
1884 | |||
1885 | /* Save the args: we "reboot" by execing ourselves again. */ | ||
1886 | main_args = argv; | ||
1887 | |||
1888 | /* | ||
1889 | * First we initialize the device list. We keep a pointer to the last | ||
1890 | * device, and the next interrupt number to use for devices (1: | ||
1891 | * remember that 0 is used by the timer). | ||
1892 | */ | ||
1893 | devices.lastdev = NULL; | ||
1894 | devices.next_irq = 1; | ||
1895 | |||
1896 | /* We're CPU 0. In fact, that's the only CPU possible right now. */ | ||
1897 | cpu_id = 0; | ||
1898 | |||
1899 | /* | ||
1900 | * We need to know how much memory so we can set up the device | ||
1901 | * descriptor and memory pages for the devices as we parse the command | ||
1902 | * line. So we quickly look through the arguments to find the amount | ||
1903 | * of memory now. | ||
1904 | */ | ||
1905 | for (i = 1; i < argc; i++) { | ||
1906 | if (argv[i][0] != '-') { | ||
1907 | mem = atoi(argv[i]) * 1024 * 1024; | ||
1908 | /* | ||
1909 | * We start by mapping anonymous pages over all of | ||
1910 | * guest-physical memory range. This fills it with 0, | ||
1911 | * and ensures that the Guest won't be killed when it | ||
1912 | * tries to access it. | ||
1913 | */ | ||
1914 | guest_base = map_zeroed_pages(mem / getpagesize() | ||
1915 | + DEVICE_PAGES); | ||
1916 | guest_limit = mem; | ||
1917 | guest_max = mem + DEVICE_PAGES*getpagesize(); | ||
1918 | devices.descpage = get_pages(1); | ||
1919 | break; | ||
1920 | } | ||
1921 | } | ||
1922 | |||
1923 | /* The options are fairly straight-forward */ | ||
1924 | while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { | ||
1925 | switch (c) { | ||
1926 | case 'v': | ||
1927 | verbose = true; | ||
1928 | break; | ||
1929 | case 't': | ||
1930 | setup_tun_net(optarg); | ||
1931 | break; | ||
1932 | case 'b': | ||
1933 | setup_block_file(optarg); | ||
1934 | break; | ||
1935 | case 'r': | ||
1936 | setup_rng(); | ||
1937 | break; | ||
1938 | case 'i': | ||
1939 | initrd_name = optarg; | ||
1940 | break; | ||
1941 | case 'u': | ||
1942 | user_details = getpwnam(optarg); | ||
1943 | if (!user_details) | ||
1944 | err(1, "getpwnam failed, incorrect username?"); | ||
1945 | break; | ||
1946 | case 'c': | ||
1947 | chroot_path = optarg; | ||
1948 | break; | ||
1949 | default: | ||
1950 | warnx("Unknown argument %s", argv[optind]); | ||
1951 | usage(); | ||
1952 | } | ||
1953 | } | ||
1954 | /* | ||
1955 | * After the other arguments we expect memory and kernel image name, | ||
1956 | * followed by command line arguments for the kernel. | ||
1957 | */ | ||
1958 | if (optind + 2 > argc) | ||
1959 | usage(); | ||
1960 | |||
1961 | verbose("Guest base is at %p\n", guest_base); | ||
1962 | |||
1963 | /* We always have a console device */ | ||
1964 | setup_console(); | ||
1965 | |||
1966 | /* Now we load the kernel */ | ||
1967 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); | ||
1968 | |||
1969 | /* Boot information is stashed at physical address 0 */ | ||
1970 | boot = from_guest_phys(0); | ||
1971 | |||
1972 | /* Map the initrd image if requested (at top of physical memory) */ | ||
1973 | if (initrd_name) { | ||
1974 | initrd_size = load_initrd(initrd_name, mem); | ||
1975 | /* | ||
1976 | * These are the location in the Linux boot header where the | ||
1977 | * start and size of the initrd are expected to be found. | ||
1978 | */ | ||
1979 | boot->hdr.ramdisk_image = mem - initrd_size; | ||
1980 | boot->hdr.ramdisk_size = initrd_size; | ||
1981 | /* The bootloader type 0xFF means "unknown"; that's OK. */ | ||
1982 | boot->hdr.type_of_loader = 0xFF; | ||
1983 | } | ||
1984 | |||
1985 | /* | ||
1986 | * The Linux boot header contains an "E820" memory map: ours is a | ||
1987 | * simple, single region. | ||
1988 | */ | ||
1989 | boot->e820_entries = 1; | ||
1990 | boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM }); | ||
1991 | /* | ||
1992 | * The boot header contains a command line pointer: we put the command | ||
1993 | * line after the boot header. | ||
1994 | */ | ||
1995 | boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); | ||
1996 | /* We use a simple helper to copy the arguments separated by spaces. */ | ||
1997 | concat((char *)(boot + 1), argv+optind+2); | ||
1998 | |||
1999 | /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */ | ||
2000 | boot->hdr.kernel_alignment = 0x1000000; | ||
2001 | |||
2002 | /* Boot protocol version: 2.07 supports the fields for lguest. */ | ||
2003 | boot->hdr.version = 0x207; | ||
2004 | |||
2005 | /* The hardware_subarch value of "1" tells the Guest it's an lguest. */ | ||
2006 | boot->hdr.hardware_subarch = 1; | ||
2007 | |||
2008 | /* Tell the entry path not to try to reload segment registers. */ | ||
2009 | boot->hdr.loadflags |= KEEP_SEGMENTS; | ||
2010 | |||
2011 | /* We tell the kernel to initialize the Guest. */ | ||
2012 | tell_kernel(start); | ||
2013 | |||
2014 | /* Ensure that we terminate if a device-servicing child dies. */ | ||
2015 | signal(SIGCHLD, kill_launcher); | ||
2016 | |||
2017 | /* If we exit via err(), this kills all the threads, restores tty. */ | ||
2018 | atexit(cleanup_devices); | ||
2019 | |||
2020 | /* If requested, chroot to a directory */ | ||
2021 | if (chroot_path) { | ||
2022 | if (chroot(chroot_path) != 0) | ||
2023 | err(1, "chroot(\"%s\") failed", chroot_path); | ||
2024 | |||
2025 | if (chdir("/") != 0) | ||
2026 | err(1, "chdir(\"/\") failed"); | ||
2027 | |||
2028 | verbose("chroot done\n"); | ||
2029 | } | ||
2030 | |||
2031 | /* If requested, drop privileges */ | ||
2032 | if (user_details) { | ||
2033 | uid_t u; | ||
2034 | gid_t g; | ||
2035 | |||
2036 | u = user_details->pw_uid; | ||
2037 | g = user_details->pw_gid; | ||
2038 | |||
2039 | if (initgroups(user_details->pw_name, g) != 0) | ||
2040 | err(1, "initgroups failed"); | ||
2041 | |||
2042 | if (setresgid(g, g, g) != 0) | ||
2043 | err(1, "setresgid failed"); | ||
2044 | |||
2045 | if (setresuid(u, u, u) != 0) | ||
2046 | err(1, "setresuid failed"); | ||
2047 | |||
2048 | verbose("Dropping privileges completed\n"); | ||
2049 | } | ||
2050 | |||
2051 | /* Finally, run the Guest. This doesn't return. */ | ||
2052 | run_guest(); | ||
2053 | } | ||
2054 | /*:*/ | ||
2055 | |||
2056 | /*M:999 | ||
2057 | * Mastery is done: you now know everything I do. | ||
2058 | * | ||
2059 | * But surely you have seen code, features and bugs in your wanderings which | ||
2060 | * you now yearn to attack? That is the real game, and I look forward to you | ||
2061 | * patching and forking lguest into the Your-Name-Here-visor. | ||
2062 | * | ||
2063 | * Farewell, and good coding! | ||
2064 | * Rusty Russell. | ||
2065 | */ | ||
diff --git a/Documentation/virtual/lguest/lguest.txt b/Documentation/virtual/lguest/lguest.txt deleted file mode 100644 index bff0c554485d..000000000000 --- a/Documentation/virtual/lguest/lguest.txt +++ /dev/null | |||
@@ -1,129 +0,0 @@ | |||
1 | __ | ||
2 | (___()'`; Rusty's Remarkably Unreliable Guide to Lguest | ||
3 | /, /` - or, A Young Coder's Illustrated Hypervisor | ||
4 | \\"--\\ http://lguest.ozlabs.org | ||
5 | |||
6 | Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel, | ||
7 | for Linux developers and users to experiment with virtualization with the | ||
8 | minimum of complexity. Nonetheless, it should have sufficient features to | ||
9 | make it useful for specific tasks, and, of course, you are encouraged to fork | ||
10 | and enhance it (see drivers/lguest/README). | ||
11 | |||
12 | Features: | ||
13 | |||
14 | - Kernel module which runs in a normal kernel. | ||
15 | - Simple I/O model for communication. | ||
16 | - Simple program to create new guests. | ||
17 | - Logo contains cute puppies: http://lguest.ozlabs.org | ||
18 | |||
19 | Developer features: | ||
20 | |||
21 | - Fun to hack on. | ||
22 | - No ABI: being tied to a specific kernel anyway, you can change anything. | ||
23 | - Many opportunities for improvement or feature implementation. | ||
24 | |||
25 | Running Lguest: | ||
26 | |||
27 | - The easiest way to run lguest is to use same kernel as guest and host. | ||
28 | You can configure them differently, but usually it's easiest not to. | ||
29 | |||
30 | You will need to configure your kernel with the following options: | ||
31 | |||
32 | "General setup": | ||
33 | "Prompt for development and/or incomplete code/drivers" = Y | ||
34 | (CONFIG_EXPERIMENTAL=y) | ||
35 | |||
36 | "Processor type and features": | ||
37 | "Paravirtualized guest support" = Y | ||
38 | "Lguest guest support" = Y | ||
39 | "High Memory Support" = off/4GB | ||
40 | "Alignment value to which kernel should be aligned" = 0x100000 | ||
41 | (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and | ||
42 | CONFIG_PHYSICAL_ALIGN=0x100000) | ||
43 | |||
44 | "Device Drivers": | ||
45 | "Block devices" | ||
46 | "Virtio block driver (EXPERIMENTAL)" = M/Y | ||
47 | "Network device support" | ||
48 | "Universal TUN/TAP device driver support" = M/Y | ||
49 | "Virtio network driver (EXPERIMENTAL)" = M/Y | ||
50 | (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m) | ||
51 | |||
52 | "Virtualization" | ||
53 | "Linux hypervisor example code" = M/Y | ||
54 | (CONFIG_LGUEST=m) | ||
55 | |||
56 | - A tool called "lguest" is available in this directory: type "make" | ||
57 | to build it. If you didn't build your kernel in-tree, use "make | ||
58 | O=<builddir>". | ||
59 | |||
60 | - Create or find a root disk image. There are several useful ones | ||
61 | around, such as the xm-test tiny root image at | ||
62 | http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img | ||
63 | |||
64 | For more serious work, I usually use a distribution ISO image and | ||
65 | install it under qemu, then make multiple copies: | ||
66 | |||
67 | dd if=/dev/zero of=rootfile bs=1M count=2048 | ||
68 | qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d | ||
69 | |||
70 | Make sure that you install a getty on /dev/hvc0 if you want to log in on the | ||
71 | console! | ||
72 | |||
73 | - "modprobe lg" if you built it as a module. | ||
74 | |||
75 | - Run an lguest as root: | ||
76 | |||
77 | Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \ | ||
78 | --block=rootfile root=/dev/vda | ||
79 | |||
80 | Explanation: | ||
81 | 64: the amount of memory to use, in MB. | ||
82 | |||
83 | vmlinux: the kernel image found in the top of your build directory. You | ||
84 | can also use a standard bzImage. | ||
85 | |||
86 | --tunnet=192.168.19.1: configures a "tap" device for networking with this | ||
87 | IP address. | ||
88 | |||
89 | --block=rootfile: a file or block device which becomes /dev/vda | ||
90 | inside the guest. | ||
91 | |||
92 | root=/dev/vda: this (and anything else on the command line) are | ||
93 | kernel boot parameters. | ||
94 | |||
95 | - Configuring networking. I usually have the host masquerade, using | ||
96 | "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 > | ||
97 | /proc/sys/net/ipv4/ip_forward". In this example, I would configure | ||
98 | eth0 inside the guest at 192.168.19.2. | ||
99 | |||
100 | Another method is to bridge the tap device to an external interface | ||
101 | using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest | ||
102 | to obtain an IP address. The bridge needs to be configured first: | ||
103 | this option simply adds the tap interface to it. | ||
104 | |||
105 | A simple example on my system: | ||
106 | |||
107 | ifconfig eth0 0.0.0.0 | ||
108 | brctl addbr lg0 | ||
109 | ifconfig lg0 up | ||
110 | brctl addif lg0 eth0 | ||
111 | dhclient lg0 | ||
112 | |||
113 | Then use --tunnet=bridge:lg0 when launching the guest. | ||
114 | |||
115 | See: | ||
116 | |||
117 | http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge | ||
118 | |||
119 | for general information on how to get bridging to work. | ||
120 | |||
121 | - Random number generation. Using the --rng option will provide a | ||
122 | /dev/hwrng in the guest that will read from the host's /dev/random. | ||
123 | Use this option in conjunction with rng-tools (see ../hw_random.txt) | ||
124 | to provide entropy to the guest kernel's /dev/random. | ||
125 | |||
126 | There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest | ||
127 | |||
128 | Good luck! | ||
129 | Rusty Russell rusty@rustcorp.com.au. | ||
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index f464f47bc60d..6752870c4970 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt | |||
@@ -117,7 +117,7 @@ can be influenced by kernel parameters: | |||
117 | 117 | ||
118 | slub_min_objects=x (default 4) | 118 | slub_min_objects=x (default 4) |
119 | slub_min_order=x (default 0) | 119 | slub_min_order=x (default 0) |
120 | slub_max_order=x (default 1) | 120 | slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER)) |
121 | 121 | ||
122 | slub_min_objects allows to specify how many objects must at least fit | 122 | slub_min_objects allows to specify how many objects must at least fit |
123 | into one slab in order for the allocation order to be acceptable. | 123 | into one slab in order for the allocation order to be acceptable. |
@@ -131,7 +131,10 @@ slub_min_objects. | |||
131 | slub_max_order specified the order at which slub_min_objects should no | 131 | slub_max_order specified the order at which slub_min_objects should no |
132 | longer be checked. This is useful to avoid SLUB trying to generate | 132 | longer be checked. This is useful to avoid SLUB trying to generate |
133 | super large order pages to fit slub_min_objects of a slab cache with | 133 | super large order pages to fit slub_min_objects of a slab cache with |
134 | large object sizes into one high order page. | 134 | large object sizes into one high order page. Setting command line |
135 | parameter debug_guardpage_minorder=N (N > 0), forces setting | ||
136 | slub_max_order to 0, what cause minimum possible order of slabs | ||
137 | allocation. | ||
135 | 138 | ||
136 | SLUB Debug output | 139 | SLUB Debug output |
137 | ----------------- | 140 | ----------------- |
diff --git a/Documentation/watchdog/00-INDEX b/Documentation/watchdog/00-INDEX index fc51128071c2..fc9082a1477a 100644 --- a/Documentation/watchdog/00-INDEX +++ b/Documentation/watchdog/00-INDEX | |||
@@ -1,5 +1,7 @@ | |||
1 | 00-INDEX | 1 | 00-INDEX |
2 | - this file. | 2 | - this file. |
3 | convert_drivers_to_kernel_api.txt | ||
4 | - how-to for converting old watchdog drivers to the new kernel API. | ||
3 | hpwdt.txt | 5 | hpwdt.txt |
4 | - information on the HP iLO2 NMI watchdog | 6 | - information on the HP iLO2 NMI watchdog |
5 | pcwd-watchdog.txt | 7 | pcwd-watchdog.txt |
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt index ae1e90036d06..be8119bb15d2 100644 --- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt +++ b/Documentation/watchdog/convert_drivers_to_kernel_api.txt | |||
@@ -163,6 +163,25 @@ Here is a simple example for a watchdog device: | |||
163 | +}; | 163 | +}; |
164 | 164 | ||
165 | 165 | ||
166 | Handle the 'nowayout' feature | ||
167 | ----------------------------- | ||
168 | |||
169 | A few drivers use nowayout statically, i.e. there is no module parameter for it | ||
170 | and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be | ||
171 | used. This needs to be converted by initializing the status variable of the | ||
172 | watchdog_device like this: | ||
173 | |||
174 | .status = WATCHDOG_NOWAYOUT_INIT_STATUS, | ||
175 | |||
176 | Most drivers, however, also allow runtime configuration of nowayout, usually | ||
177 | by adding a module parameter. The conversion for this would be something like: | ||
178 | |||
179 | watchdog_set_nowayout(&s3c2410_wdd, nowayout); | ||
180 | |||
181 | The module parameter itself needs to stay, everything else related to nowayout | ||
182 | can go, though. This will likely be some code in open(), close() or write(). | ||
183 | |||
184 | |||
166 | Register the watchdog device | 185 | Register the watchdog device |
167 | ---------------------------- | 186 | ---------------------------- |
168 | 187 | ||
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt index 4f7c894244d2..4b93c28e35c6 100644 --- a/Documentation/watchdog/watchdog-kernel-api.txt +++ b/Documentation/watchdog/watchdog-kernel-api.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | The Linux WatchDog Timer Driver Core kernel API. | 1 | The Linux WatchDog Timer Driver Core kernel API. |
2 | =============================================== | 2 | =============================================== |
3 | Last reviewed: 22-Jul-2011 | 3 | Last reviewed: 29-Nov-2011 |
4 | 4 | ||
5 | Wim Van Sebroeck <wim@iguana.be> | 5 | Wim Van Sebroeck <wim@iguana.be> |
6 | 6 | ||
@@ -142,6 +142,14 @@ bit-operations. The status bits that are defined are: | |||
142 | * WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog. | 142 | * WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog. |
143 | If this bit is set then the watchdog timer will not be able to stop. | 143 | If this bit is set then the watchdog timer will not be able to stop. |
144 | 144 | ||
145 | To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog | ||
146 | timer device) you can either: | ||
147 | * set it statically in your watchdog_device struct with | ||
148 | .status = WATCHDOG_NOWAYOUT_INIT_STATUS, | ||
149 | (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or | ||
150 | * use the following helper function: | ||
151 | static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout) | ||
152 | |||
145 | Note: The WatchDog Timer Driver Core supports the magic close feature and | 153 | Note: The WatchDog Timer Driver Core supports the magic close feature and |
146 | the nowayout feature. To use the magic close feature you must set the | 154 | the nowayout feature. To use the magic close feature you must set the |
147 | WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure. | 155 | WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure. |